[llvm] RFC: [AMDGPU] Stop using attribute groups in CodeGen tests (PR #86157)

Thu Mar 21 10:15:11 PDT 2024

https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/86157

None

>From ce7a832884f6966dd2f631944753ee916b49702b Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 21 Mar 2024 17:10:26 +0000
Subject: [PATCH] RFC: [AMDGPU] Stop using attribute groups in CodeGen tests

---
 .../AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll  |  92 +-
 .../AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll  |  96 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll  |  19 +-
 .../AMDGPU/GlobalISel/buffer-schedule.ll      |  13 +-
 .../GlobalISel/call-outgoing-stack-args.ll    |   9 +-
 .../GlobalISel/clamp-fmed3-const-combine.ll   |  22 +-
 .../GlobalISel/clamp-minmax-const-combine.ll  |  39 +-
 .../GlobalISel/combine-fma-add-ext-fma.ll     |  10 +-
 .../GlobalISel/combine-fma-add-fma-mul.ll     |  13 +-
 .../GlobalISel/constant-bus-restriction.ll    |  11 +-
 .../GlobalISel/dereferenceable-declaration.ll |  14 +-
 ...-divergent-i1-phis-no-lane-mask-merging.ll |   4 +-
 ...divergent-i1-phis-no-lane-mask-merging.mir |   4 +-
 .../GlobalISel/dropped_debug_info_assert.ll   |   4 +-
 .../GlobalISel/dynamic-alloca-divergent.ll    |   4 +-
 .../CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll     |  34 +-
 .../AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll |   6 +-
 .../AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll |   6 +-
 .../CodeGen/AMDGPU/GlobalISel/floor.f64.ll    |  13 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll    |  22 +-
 .../GlobalISel/fmed3-min-max-const-combine.ll |  39 +-
 .../AMDGPU/GlobalISel/fp-atomics-gfx940.ll    |   4 +-
 .../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll  |  44 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll   |  31 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll   |  50 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll   |  50 +-
 .../AMDGPU/GlobalISel/function-returns.ll     | 136 ++-
 .../global-atomic-fadd.f32-no-rtn.ll          |   6 +-
 .../GlobalISel/global-atomic-fadd.f32-rtn.ll  |   6 +-
 .../GlobalISel/global-atomic-fadd.f64.ll      |  10 +-
 .../GlobalISel/image-waterfall-loop-O0.ll     |   4 +-
 .../GlobalISel/insertelement-stack-lower.ll   |   4 +-
 .../AMDGPU/GlobalISel/insertelement.large.ll  |   7 +-
 ...irtranslator-amdgpu_kernel-system-sgprs.ll |   8 +-
 .../GlobalISel/irtranslator-amdgpu_ps.ll      |  13 +-
 .../GlobalISel/irtranslator-amdgpu_vs.ll      |  12 +-
 .../irtranslator-call-implicit-args.ll        |  35 +-
 .../GlobalISel/irtranslator-call-non-fixed.ll |  24 +-
 .../irtranslator-call-return-values.ll        | 224 +++--
 .../AMDGPU/GlobalISel/irtranslator-call.ll    | 366 ++++----
 .../GlobalISel/irtranslator-constrained-fp.ll |  49 +-
 .../GlobalISel/irtranslator-function-args.ll  | 262 +++---
 .../GlobalISel/irtranslator-invariant.ll      |   8 +-
 .../irtranslator-memory-intrinsics.ll         |  33 +-
 .../GlobalISel/irtranslator-metadata.ll       |   4 +-
 ...rtranslator-readnone-intrinsic-callsite.ll |   7 +-
 .../GlobalISel/irtranslator-sibling-call.ll   |  55 +-
 .../AMDGPU/GlobalISel/lds-global-value.ll     |   4 +-
 .../CodeGen/AMDGPU/GlobalISel/lds-relocs.ll   |   5 +-
 ...galize-llvm.amdgcn.image.atomic.dim.a16.ll |  56 +-
 .../legalize-llvm.amdgcn.image.dim.a16.ll     |  95 +-
 .../legalize-llvm.amdgcn.image.load.2d.d16.ll |  18 +-
 .../legalize-llvm.amdgcn.image.load.2d.ll     |  18 +-
 ...lize-llvm.amdgcn.image.load.2darraymsaa.ll |   6 +-
 .../legalize-llvm.amdgcn.image.load.3d.ll     |   6 +-
 .../legalize-llvm.amdgcn.image.sample.a16.ll  | 100 +--
 .../legalize-llvm.amdgcn.image.sample.g16.ll  |  42 +-
 ...legalize-llvm.amdgcn.image.store.2d.d16.ll |  10 +-
 .../GlobalISel/llvm.amdgcn.dispatch.ptr.ll    |   6 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll |   8 +-
 .../GlobalISel/llvm.amdgcn.div.scale.ll       |  19 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll  |   4 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll  |   4 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll  |   4 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll    |   4 +-
 .../GlobalISel/llvm.amdgcn.fmul.legacy.ll     |  12 +-
 .../llvm.amdgcn.global.atomic.csub.ll         |   5 +-
 .../llvm.amdgcn.global.atomic.fadd.ll         |   6 +-
 .../llvm.amdgcn.image.atomic.dim.a16.ll       |  82 +-
 .../llvm.amdgcn.image.atomic.dim.ll           |  82 +-
 .../llvm.amdgcn.image.gather4.a16.dim.ll      |  30 +-
 .../llvm.amdgcn.image.gather4.dim.ll          |  32 +-
 .../llvm.amdgcn.image.gather4.o.dim.ll        |  26 +-
 .../llvm.amdgcn.image.getresinfo.a16.ll       |  19 +-
 .../llvm.amdgcn.image.getresinfo.ll           |  25 +-
 .../llvm.amdgcn.image.load.1d.d16.ll          |  18 +-
 .../GlobalISel/llvm.amdgcn.image.load.1d.ll   |  18 +-
 .../GlobalISel/llvm.amdgcn.image.load.2d.ll   |   6 +-
 .../llvm.amdgcn.image.load.2darraymsaa.a16.ll |   6 +-
 .../llvm.amdgcn.image.load.2darraymsaa.ll     |   6 +-
 .../llvm.amdgcn.image.load.3d.a16.ll          |   6 +-
 .../GlobalISel/llvm.amdgcn.image.load.3d.ll   |   6 +-
 .../llvm.amdgcn.image.sample.cd.g16.ll        |  20 +-
 .../llvm.amdgcn.image.sample.g16.ll           |  26 +-
 .../llvm.amdgcn.image.store.2d.d16.ll         |  10 +-
 .../GlobalISel/llvm.amdgcn.image.store.2d.ll  |  12 +-
 .../llvm.amdgcn.implicit.ptr.buffer.ll        |   7 +-
 .../GlobalISel/llvm.amdgcn.interp.inreg.ll    |  33 +-
 .../GlobalISel/llvm.amdgcn.interp.p1.f16.ll   |  12 +-
 .../GlobalISel/llvm.amdgcn.is.private.ll      |   6 +-
 .../GlobalISel/llvm.amdgcn.is.shared.ll       |   6 +-
 .../llvm.amdgcn.kernarg.segment.ptr.ll        |  23 +-
 .../GlobalISel/llvm.amdgcn.mfma.gfx90a.ll     |  22 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll  |  10 +-
 .../GlobalISel/llvm.amdgcn.queue.ptr.ll       |   6 +-
 .../llvm.amdgcn.raw.buffer.atomic.add.ll      |   6 +-
 .../llvm.amdgcn.raw.buffer.atomic.fadd.ll     |   6 +-
 .../llvm.amdgcn.raw.buffer.load.format.f16.ll |  10 +-
 .../llvm.amdgcn.raw.buffer.load.format.ll     |  10 +-
 .../llvm.amdgcn.raw.ptr.buffer.atomic.add.ll  |   6 +-
 .../llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll |   6 +-
 ...m.amdgcn.raw.ptr.buffer.load.format.f16.ll |  10 +-
 .../llvm.amdgcn.raw.ptr.buffer.load.format.ll |  10 +-
 .../llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll   |  10 +-
 .../llvm.amdgcn.raw.ptr.tbuffer.load.ll       |  10 +-
 .../llvm.amdgcn.raw.tbuffer.load.f16.ll       |  10 +-
 .../llvm.amdgcn.raw.tbuffer.load.ll           |  10 +-
 .../GlobalISel/llvm.amdgcn.rsq.clamp.ll       |  28 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll |   4 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll  |   6 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll     | 109 ++-
 .../AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll    |   4 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll    |   4 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll    |   4 +-
 .../GlobalISel/llvm.amdgcn.set.inactive.ll    |  12 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll  |  12 +-
 .../llvm.amdgcn.struct.buffer.atomic.add.ll   |   6 +-
 .../llvm.amdgcn.struct.buffer.atomic.fadd.ll  |   6 +-
 ...vm.amdgcn.struct.buffer.load.format.f16.ll |  12 +-
 .../llvm.amdgcn.struct.buffer.load.format.ll  |  18 +-
 .../llvm.amdgcn.struct.buffer.load.ll         |  22 +-
 ...lvm.amdgcn.struct.ptr.buffer.atomic.add.ll |   6 +-
 ...vm.amdgcn.struct.ptr.buffer.atomic.fadd.ll |   6 +-
 ...mdgcn.struct.ptr.buffer.load.format.f16.ll |  12 +-
 ...vm.amdgcn.struct.ptr.buffer.load.format.ll |  18 +-
 .../llvm.amdgcn.struct.ptr.buffer.load.ll     |  22 +-
 ...llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll |  10 +-
 .../llvm.amdgcn.struct.ptr.tbuffer.load.ll    |  10 +-
 .../llvm.amdgcn.struct.tbuffer.load.f16.ll    |  10 +-
 .../llvm.amdgcn.struct.tbuffer.load.ll        |  10 +-
 .../GlobalISel/llvm.amdgcn.trig.preop.ll      |   4 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll     | 119 ++-
 .../AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll    |   4 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll    |   4 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll    |   4 +-
 .../GlobalISel/llvm.amdgcn.update.dpp.ll      |  11 +-
 .../GlobalISel/llvm.amdgcn.workgroup.id.ll    |  15 +-
 .../GlobalISel/llvm.amdgcn.workitem.id.ll     |  25 +-
 .../GlobalISel/llvm.amdgcn.wqm.demote.ll      |  54 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll      |  12 +-
 .../GlobalISel/llvm.amdgcn.writelane.ll       |  32 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll      |  22 +-
 .../CodeGen/AMDGPU/GlobalISel/llvm.powi.ll    |   8 +-
 .../AMDGPU/GlobalISel/non-entry-alloca.ll     |   4 +-
 .../regbankselect-amdgcn.image.load.1d.ll     |   4 +-
 .../regbankselect-amdgcn.image.sample.1d.ll   |   4 +-
 .../AMDGPU/GlobalISel/regbankselect-load.mir  |  23 +-
 .../AMDGPU/GlobalISel/regbankselect.mir       |   5 +-
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll |  48 +-
 .../AMDGPU/GlobalISel/shl-ext-reduce.ll       |   4 +-
 .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll |  48 +-
 .../AMDGPU/GlobalISel/strict_fma.f16.ll       |  22 +-
 .../AMDGPU/GlobalISel/strict_fma.f32.ll       |  22 +-
 .../AMDGPU/GlobalISel/strict_fma.f64.ll       |  22 +-
 .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll |  60 +-
 .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll |  60 +-
 .../GlobalISel/widen-i8-i16-scalar-loads.ll   |  22 +-
 llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll    |   4 +-
 .../abi-attribute-hints-undefined-behavior.ll |  30 +-
 llvm/test/CodeGen/AMDGPU/acc-ldst.ll          |  32 +-
 llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir     |  70 +-
 llvm/test/CodeGen/AMDGPU/add.i16.ll           |  21 +-
 llvm/test/CodeGen/AMDGPU/add.ll               |  11 +-
 llvm/test/CodeGen/AMDGPU/add.v2i16.ll         |  31 +-
 .../CodeGen/AMDGPU/addrspacecast-captured.ll  |  10 +-
 .../AMDGPU/addrspacecast-constantexpr.ll      |  33 +-
 .../test/CodeGen/AMDGPU/addrspacecast.gfx6.ll |  10 +-
 llvm/test/CodeGen/AMDGPU/addrspacecast.ll     |  61 +-
 .../AMDGPU/adjust-writemask-invalid-copy.ll   |  19 +-
 .../AMDGPU/agpr-copy-no-free-registers.ll     |  19 +-
 llvm/test/CodeGen/AMDGPU/agpr-csr.ll          |  28 +-
 .../CodeGen/AMDGPU/agpr-register-count.ll     |  26 +-
 llvm/test/CodeGen/AMDGPU/agpr-remat.ll        |   6 +-
 llvm/test/CodeGen/AMDGPU/alignbit-pat.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll       |  31 +-
 .../AMDGPU/amdgcn-load-offset-from-reg.ll     |  24 +-
 .../AMDGPU/amdgpu-attributor-no-agpr.ll       |   7 +-
 .../AMDGPU/amdgpu-codegenprepare-fdiv.ll      |   4 +-
 ...amdgpu-codegenprepare-fold-binop-select.ll |   4 +-
 .../AMDGPU/amdgpu-codegenprepare-sqrt.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll     |   9 +-
 .../CodeGen/AMDGPU/amdgpu-nsa-threshold.ll    |  21 +-
 .../test/CodeGen/AMDGPU/amdgpu-reloc-const.ll |  12 +-
 .../AMDGPU/amdgpu-simplify-libcall-ceil.ll    |  25 +-
 .../amdgpu-simplify-libcall-copysign.ll       |  19 +-
 .../AMDGPU/amdgpu-simplify-libcall-exp.ll     |  52 +-
 .../AMDGPU/amdgpu-simplify-libcall-exp2.ll    |  52 +-
 .../AMDGPU/amdgpu-simplify-libcall-fabs.ll    |  25 +-
 .../AMDGPU/amdgpu-simplify-libcall-floor.ll   |  25 +-
 .../AMDGPU/amdgpu-simplify-libcall-fma.ll     |  15 +-
 .../AMDGPU/amdgpu-simplify-libcall-fmax.ll    |  19 +-
 .../AMDGPU/amdgpu-simplify-libcall-fmin.ll    |  19 +-
 .../AMDGPU/amdgpu-simplify-libcall-ldexp.ll   |  14 +-
 .../AMDGPU/amdgpu-simplify-libcall-log.ll     |  52 +-
 .../AMDGPU/amdgpu-simplify-libcall-log10.ll   |  52 +-
 .../AMDGPU/amdgpu-simplify-libcall-log2.ll    |  52 +-
 .../AMDGPU/amdgpu-simplify-libcall-mad.ll     |  15 +-
 .../AMDGPU/amdgpu-simplify-libcall-pow.ll     |  19 +-
 .../AMDGPU/amdgpu-simplify-libcall-pown.ll    |   9 +-
 .../AMDGPU/amdgpu-simplify-libcall-powr.ll    |  19 +-
 .../AMDGPU/amdgpu-simplify-libcall-rint.ll    |  25 +-
 .../AMDGPU/amdgpu-simplify-libcall-rootn.ll   |  42 +-
 .../AMDGPU/amdgpu-simplify-libcall-round.ll   |  25 +-
 .../AMDGPU/amdgpu-simplify-libcall-sincos.ll  |  91 +-
 ...dgpu-simplify-libcall-sincos.nobuiltins.ll |  22 +-
 .../AMDGPU/amdgpu-simplify-libcall-sqrt.ll    |  32 +-
 .../AMDGPU/amdgpu-simplify-libcall-trunc.ll   |  25 +-
 .../CodeGen/AMDGPU/amdgpu-unroll-threshold.ll |   4 +-
 .../CodeGen/AMDGPU/amdgpu.private-memory.ll   |  33 +-
 .../amdgpu.work-item-intrinsics.deprecated.ll |  38 +-
 llvm/test/CodeGen/AMDGPU/amdpal-callable.ll   |  36 +-
 .../amdpal-metadata-agpr-register-count.ll    |  18 +-
 .../CodeGen/AMDGPU/amdpal-msgpack-denormal.ll |  16 +-
 .../AMDGPU/amdpal-msgpack-dx10-clamp.ll       |  16 +-
 .../CodeGen/AMDGPU/amdpal-msgpack-ieee.ll     |  16 +-
 .../CodeGen/AMDGPU/amdpal-msgpack-psenable.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll   |   4 +-
 llvm/test/CodeGen/AMDGPU/amdpal.ll            |   6 +-
 llvm/test/CodeGen/AMDGPU/and-gcn.ll           |   6 +-
 llvm/test/CodeGen/AMDGPU/and.ll               |  25 +-
 llvm/test/CodeGen/AMDGPU/andorbitset.ll       |  10 +-
 .../annotate-kernel-features-hsa-call.ll      | 154 ++--
 .../AMDGPU/annotate-kernel-features-hsa.ll    |  85 +-
 .../AMDGPU/annotate-kernel-features.ll        |  57 +-
 .../CodeGen/AMDGPU/any_extend_vector_inreg.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/anyext.ll            |   8 +-
 .../AMDGPU/are-loads-from-same-base-ptr.ll    |   4 +-
 .../test/CodeGen/AMDGPU/array-ptr-calc-i32.ll |  12 +-
 .../test/CodeGen/AMDGPU/array-ptr-calc-i64.ll |   6 +-
 llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll        |  21 +-
 llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll  |   8 +-
 .../AMDGPU/atomics-hw-remarks-gfx90a.ll       |  20 +-
 ...-amdgpu-flat-work-group-size-vgpr-limit.ll |  10 +-
 .../attr-amdgpu-flat-work-group-size.ll       |  12 +-
 .../CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll    |  23 +-
 .../CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll    |   3 +-
 .../AMDGPU/attr-amdgpu-num-workgroups.ll      |  15 +-
 .../attr-amdgpu-num-workgroups_error_check.ll |  30 +-
 .../AMDGPU/attr-amdgpu-waves-per-eu.ll        |  33 +-
 llvm/test/CodeGen/AMDGPU/attr-unparseable.ll  |  24 +-
 .../back-off-barrier-subtarget-feature.ll     |   6 +-
 .../CodeGen/AMDGPU/barrier-elimination.ll     |  16 +-
 llvm/test/CodeGen/AMDGPU/basic-branch.ll      |   6 +-
 llvm/test/CodeGen/AMDGPU/basic-call-return.ll |  10 +-
 llvm/test/CodeGen/AMDGPU/bfe-patterns.ll      |  21 +-
 llvm/test/CodeGen/AMDGPU/bfm.ll               |  10 +-
 llvm/test/CodeGen/AMDGPU/big_alu.ll           |  19 +-
 .../CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll     |  13 +-
 .../CodeGen/AMDGPU/bitcast-vector-extract.ll  |  13 +-
 llvm/test/CodeGen/AMDGPU/bitreverse.ll        |  59 +-
 .../CodeGen/AMDGPU/branch-condition-and.ll    |   5 +-
 .../CodeGen/AMDGPU/branch-relax-bundle.ll     |   9 +-
 .../test/CodeGen/AMDGPU/branch-relax-spill.ll | 846 +++++++++---------
 .../AMDGPU/branch-relaxation-debug-info.mir   |  15 +-
 llvm/test/CodeGen/AMDGPU/branch-relaxation.ll |  53 +-
 .../AMDGPU/buffer-intrinsics-mmo-offsets.ll   | 422 +++++----
 llvm/test/CodeGen/AMDGPU/buffer-schedule.ll   |  17 +-
 llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll     |   4 +-
 llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll  |   5 +-
 .../AMDGPU/build-vector-insert-elt-infloop.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll |   8 +-
 .../AMDGPU/call-alias-register-usage-agpr.ll  |  10 +-
 .../AMDGPU/call-alias-register-usage0.ll      |  10 +-
 .../AMDGPU/call-alias-register-usage1.ll      |  10 +-
 .../AMDGPU/call-alias-register-usage2.ll      |  10 +-
 .../AMDGPU/call-alias-register-usage3.ll      |  10 +-
 llvm/test/CodeGen/AMDGPU/call-args-inreg.ll   | 111 ++-
 .../CodeGen/AMDGPU/call-argument-types.ll     | 210 +++--
 llvm/test/CodeGen/AMDGPU/call-constexpr.ll    |  26 +-
 llvm/test/CodeGen/AMDGPU/call-encoding.ll     |   5 +-
 .../AMDGPU/call-graph-register-usage.ll       |  80 +-
 .../AMDGPU/call-preserved-registers.ll        |  81 +-
 .../CodeGen/AMDGPU/call-reqd-group-size.ll    |   4 +-
 llvm/test/CodeGen/AMDGPU/call-return-types.ll | 150 ++--
 llvm/test/CodeGen/AMDGPU/call-skip.ll         |  14 +-
 .../AMDGPU/call-to-kernel-undefined.ll        |   6 +-
 llvm/test/CodeGen/AMDGPU/call-to-kernel.ll    |   6 +-
 llvm/test/CodeGen/AMDGPU/call-waitcnt.ll      |  22 +-
 llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir |   6 +-
 .../test/CodeGen/AMDGPU/callee-frame-setup.ll | 103 +--
 .../callee-special-input-sgprs-fixed-abi.ll   | 120 ++-
 .../callee-special-input-vgprs-packed.ll      |  77 +-
 .../AMDGPU/callee-special-input-vgprs.ll      |  88 +-
 .../CodeGen/AMDGPU/calling-conventions.ll     |  10 +-
 .../CodeGen/AMDGPU/captured-frame-index.ll    |  27 +-
 .../test/CodeGen/AMDGPU/carryout-selection.ll |  29 +-
 llvm/test/CodeGen/AMDGPU/cc-update.ll         |  32 +-
 .../AMDGPU/cgp-addressing-modes-flat.ll       |  18 +-
 .../AMDGPU/cgp-addressing-modes-gfx1030.ll    |  10 +-
 .../AMDGPU/cgp-addressing-modes-gfx908.ll     |  10 +-
 .../CodeGen/AMDGPU/cgp-addressing-modes.ll    |  57 +-
 .../CodeGen/AMDGPU/cgp-bitfield-extract.ll    |  14 +-
 llvm/test/CodeGen/AMDGPU/clamp-modifier.ll    |  83 +-
 llvm/test/CodeGen/AMDGPU/clamp.ll             | 132 ++-
 ...scer-subranges-another-copymi-not-live.mir |   4 +-
 ...oalescer-subranges-another-prune-error.mir |   4 +-
 .../AMDGPU/coalescer-subregjoin-fullcopy.mir  |   4 +-
 .../coalescer-with-subregs-bad-identical.mir  |   4 +-
 llvm/test/CodeGen/AMDGPU/collapse-endcf.ll    |  10 +-
 .../CodeGen/AMDGPU/combine-add-zext-xor.ll    |   4 +-
 .../CodeGen/AMDGPU/combine-and-sext-bool.ll   |   6 +-
 .../CodeGen/AMDGPU/combine-cond-add-sub.ll    |   8 +-
 .../CodeGen/AMDGPU/combine_andor_with_cmps.ll |  65 +-
 llvm/test/CodeGen/AMDGPU/commute-compares.ll  | 211 +++--
 llvm/test/CodeGen/AMDGPU/commute-shifts.ll    |  10 +-
 llvm/test/CodeGen/AMDGPU/commute_modifiers.ll |  61 +-
 .../AMDGPU/constant-address-space-32bit.ll    |  60 +-
 .../AMDGPU/constant-fold-mi-operands.ll       |  23 +-
 .../AMDGPU/control-flow-fastregalloc.ll       |  11 +-
 .../CodeGen/AMDGPU/control-flow-optnone.ll    |   7 +-
 .../test/CodeGen/AMDGPU/convergence-tokens.ll |  23 +-
 .../CodeGen/AMDGPU/convergent-inlineasm.ll    |   7 +-
 .../AMDGPU/copy-vgpr-clobber-spill-vgpr.mir   |   4 +-
 .../AMDGPU/cross-block-use-is-not-abi-copy.ll |  28 +-
 .../CodeGen/AMDGPU/cse-phi-incoming-val.ll    |   9 +-
 llvm/test/CodeGen/AMDGPU/cube.ll              |  13 +-
 llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll   |  35 +-
 llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll   |  31 +-
 .../CodeGen/AMDGPU/dagcombine-fma-fmad.ll     |  37 +-
 .../AMDGPU/dagcombine-reassociate-bug.ll      |   6 +-
 .../AMDGPU/debug-value-scheduler-crash.mir    |   9 +-
 llvm/test/CodeGen/AMDGPU/debug-value.ll       |  18 +-
 ...-work-group-size-overrides-waves-per-eu.ll |   7 +-
 llvm/test/CodeGen/AMDGPU/default-fp-mode.ll   |  49 +-
 .../CodeGen/AMDGPU/disable_form_clauses.ll    |   4 +-
 .../AMDGPU/diverge-extra-formal-args.ll       |  11 +-
 .../AMDGPU/diverge-interp-mov-lower.ll        |  18 +-
 .../CodeGen/AMDGPU/diverge-switch-default.ll  |   7 +-
 .../CodeGen/AMDGPU/divergence-driven-abs.ll   |   5 +-
 .../divergence-driven-negsubinlineconst.ll    |   9 +-
 .../AMDGPU/divergence-driven-sext-inreg.ll    |  13 +-
 .../divergent-branch-uniform-condition.ll     |  10 +-
 llvm/test/CodeGen/AMDGPU/dpp64_combine.ll     |  12 +-
 llvm/test/CodeGen/AMDGPU/dpp_combine.ll       |  10 +-
 .../AMDGPU/drop-mem-operand-move-smrd.ll      |  10 +-
 ...ds-negative-offset-addressing-mode-loop.ll |  14 +-
 llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll     |  50 +-
 llvm/test/CodeGen/AMDGPU/ds_read2.ll          | 107 ++-
 llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll |  48 +-
 llvm/test/CodeGen/AMDGPU/ds_read2st64.ll      |  59 +-
 llvm/test/CodeGen/AMDGPU/ds_write2.ll         |  86 +-
 llvm/test/CodeGen/AMDGPU/ds_write2st64.ll     |  28 +-
 .../AMDGPU/dual-source-blend-export.ll        |  22 +-
 .../AMDGPU/duplicate-attribute-indirect.ll    |   4 +-
 .../AMDGPU/dwarf-multi-register-use-crash.ll  |   4 +-
 llvm/test/CodeGen/AMDGPU/early-if-convert.ll  |  44 +-
 llvm/test/CodeGen/AMDGPU/early-term.mir       |   7 +-
 llvm/test/CodeGen/AMDGPU/elf.ll               |   6 +-
 llvm/test/CodeGen/AMDGPU/else.ll              |  12 +-
 llvm/test/CodeGen/AMDGPU/empty-function.ll    |   6 +-
 .../AMDGPU/enable-no-signed-zeros-fp-math.ll  |   6 +-
 llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll |   6 +-
 llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll    |  12 +-
 llvm/test/CodeGen/AMDGPU/exceed-max-sgprs.ll  |  15 +-
 .../test/CodeGen/AMDGPU/extend-bit-ops-i16.ll |  10 +-
 .../CodeGen/AMDGPU/extract_vector_elt-f16.ll  |  35 +-
 .../CodeGen/AMDGPU/extract_vector_elt-f64.ll  |   8 +-
 .../CodeGen/AMDGPU/extract_vector_elt-i16.ll  |  49 +-
 .../CodeGen/AMDGPU/extract_vector_elt-i64.ll  |  14 +-
 .../CodeGen/AMDGPU/extract_vector_elt-i8.ll   |  34 +-
 llvm/test/CodeGen/AMDGPU/fabs.f16.ll          |  23 +-
 .../CodeGen/AMDGPU/fadd-fma-fmul-combine.ll   |  29 +-
 llvm/test/CodeGen/AMDGPU/fadd.f16.ll          |   5 +-
 llvm/test/CodeGen/AMDGPU/fadd.ll              |  13 +-
 llvm/test/CodeGen/AMDGPU/fadd64.ll            |   5 +-
 .../AMDGPU/fail-select-buffer-atomic-fadd.ll  |   4 +-
 .../AMDGPU/fcanonicalize-elimination.ll       |  70 +-
 llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 169 ++--
 llvm/test/CodeGen/AMDGPU/fcanonicalize.ll     | 163 ++--
 llvm/test/CodeGen/AMDGPU/fcmp.f16.ll          |   5 +-
 llvm/test/CodeGen/AMDGPU/fconst64.ll          |   5 +-
 llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll     |  16 +-
 llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll     |  12 +-
 llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll     |  10 +-
 llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll   |   6 +-
 llvm/test/CodeGen/AMDGPU/fdiv.f16.ll          |  46 +-
 llvm/test/CodeGen/AMDGPU/fdiv.f64.ll          |  27 +-
 .../CodeGen/AMDGPU/fence-lds-read2-write2.ll  |   9 +-
 llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll |   4 +-
 .../test/CodeGen/AMDGPU/fetch-limits.r700+.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/ffloor.ll            |  14 +-
 .../AMDGPU/fix-frame-ptr-reg-copy-livein.ll   |   4 +-
 .../fix-frame-reg-in-custom-csr-spills.ll     |   6 +-
 llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll |  12 +-
 .../test/CodeGen/AMDGPU/flat-address-space.ll |  39 +-
 .../CodeGen/AMDGPU/flat-atomic-fadd.f32.ll    |   6 +-
 .../CodeGen/AMDGPU/flat-atomic-fadd.f64.ll    |   6 +-
 llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll  |   8 +-
 llvm/test/CodeGen/AMDGPU/fma-combine.ll       |  72 +-
 llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll         |   7 +-
 ...formation-fmul-distribute-denormal-mode.ll |  14 +-
 llvm/test/CodeGen/AMDGPU/fmax3.ll             |  20 +-
 llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll   |  12 +-
 llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll   |  21 +-
 llvm/test/CodeGen/AMDGPU/fmax_legacy.ll       |  37 +-
 llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll       |  22 +-
 llvm/test/CodeGen/AMDGPU/fmaxnum.ll           |  69 +-
 llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll      |  47 +-
 .../test/CodeGen/AMDGPU/fmed3-cast-combine.ll |  59 +-
 llvm/test/CodeGen/AMDGPU/fmed3.ll             | 124 ++-
 llvm/test/CodeGen/AMDGPU/fmin3.ll             |  26 +-
 .../CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll |  33 +-
 llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll   |  12 +-
 llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll   |  37 +-
 llvm/test/CodeGen/AMDGPU/fmin_legacy.ll       |  43 +-
 llvm/test/CodeGen/AMDGPU/fminnum.f64.ll       |  32 +-
 llvm/test/CodeGen/AMDGPU/fminnum.ll           |  73 +-
 llvm/test/CodeGen/AMDGPU/fminnum.r600.ll      |  47 +-
 .../AMDGPU/fmul-2-combine-multi-use.ll        |  31 +-
 llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll     |  10 +-
 llvm/test/CodeGen/AMDGPU/fmul.ll              |   8 +-
 llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll       |  61 +-
 llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll       |  67 +-
 llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll       |  27 +-
 llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll     |  21 +-
 llvm/test/CodeGen/AMDGPU/fnearbyint.ll        |  25 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll | 222 +++--
 .../CodeGen/AMDGPU/fneg-combines.legal.f16.ll |  34 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.ll     | 289 +++---
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll | 348 ++++---
 llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll  |   9 +-
 llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll     |  11 +-
 .../CodeGen/AMDGPU/fneg-modifier-casting.ll   |   4 +-
 llvm/test/CodeGen/AMDGPU/fneg.f16.ll          |  27 +-
 .../CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll    |   4 +-
 llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir |  20 +-
 llvm/test/CodeGen/AMDGPU/fold-multiple.mir    |   4 +-
 llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/fp-classify.ll       |  65 +-
 .../CodeGen/AMDGPU/fp64-atomics-gfx90a.ll     |  44 +-
 llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll    |  11 +-
 llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll    |  11 +-
 llvm/test/CodeGen/AMDGPU/fpext-free.ll        |  50 +-
 llvm/test/CodeGen/AMDGPU/fpext.f16.ll         |   4 +-
 llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll       |  13 +-
 llvm/test/CodeGen/AMDGPU/fract-match.ll       |  42 +-
 llvm/test/CodeGen/AMDGPU/fract.f64.ll         |  15 +-
 llvm/test/CodeGen/AMDGPU/fract.ll             |  15 +-
 .../CodeGen/AMDGPU/frame-index-elimination.ll |  30 +-
 .../frame-lowering-entry-all-sgpr-used.mir    |   3 +-
 .../AMDGPU/frame-lowering-fp-adjusted.mir     |   4 +-
 ...frame-setup-without-sgpr-to-vgpr-spills.ll |   6 +-
 llvm/test/CodeGen/AMDGPU/frem.ll              |  31 +-
 llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll         |  29 +-
 llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll         |  22 +-
 .../AMDGPU/fsub-as-fneg-src-modifier.ll       | 139 ++-
 llvm/test/CodeGen/AMDGPU/fsub.ll              |   9 +-
 llvm/test/CodeGen/AMDGPU/fsub64.ll            |   8 +-
 .../CodeGen/AMDGPU/function-args-inreg.ll     | 105 ++-
 llvm/test/CodeGen/AMDGPU/function-args.ll     | 166 ++--
 llvm/test/CodeGen/AMDGPU/function-returns.ll  | 120 ++-
 llvm/test/CodeGen/AMDGPU/gds-allocation.ll    |   6 +-
 llvm/test/CodeGen/AMDGPU/gds-atomic.ll        |  27 +-
 .../CodeGen/AMDGPU/gep-const-address-space.ll |   6 +-
 .../AMDGPU/gfx-callable-argument-types.ll     | 448 +++++-----
 .../gfx-callable-preserved-registers.ll       |  53 +-
 .../AMDGPU/gfx-callable-return-types.ll       |  30 +-
 .../AMDGPU/gfx11-user-sgpr-init16-bug.ll      |  20 +-
 llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll        |  14 +-
 .../AMDGPU/global-atomic-fadd.f32-no-rtn.ll   |   6 +-
 .../AMDGPU/global-atomic-fadd.f32-rtn.ll      |   6 +-
 .../CodeGen/AMDGPU/global-atomic-fadd.f64.ll  |  10 +-
 .../global-atomics-fp-wrong-subtarget.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll |  26 +-
 .../global-saddr-atomics-min-max-system.ll    |   2 -
 .../AMDGPU/global-saddr-atomics.gfx1030.ll    |   4 +-
 .../AMDGPU/global-saddr-atomics.gfx908.ll     |   6 +-
 .../CodeGen/AMDGPU/global-saddr-atomics.ll    |   2 -
 .../CodeGen/AMDGPU/global-smrd-unknown.ll     |   7 +-
 .../AMDGPU/global_atomic_optimizer_fp_rtn.ll  |  52 +-
 .../AMDGPU/global_atomics_iterative_scan.ll   |  11 +-
 .../global_atomics_iterative_scan_fp.ll       |  10 +-
 .../global_atomics_optimizer_fp_no_rtn.ll     |  52 +-
 .../AMDGPU/global_atomics_scan_fadd.ll        |  22 +-
 .../AMDGPU/global_atomics_scan_fmax.ll        |  14 +-
 .../AMDGPU/global_atomics_scan_fmin.ll        |  14 +-
 .../AMDGPU/global_atomics_scan_fsub.ll        |  22 +-
 llvm/test/CodeGen/AMDGPU/global_smrd.ll       |   6 +-
 llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll   |   7 +-
 .../AMDGPU/greedy-global-heuristic.mir        |   4 +-
 llvm/test/CodeGen/AMDGPU/half.ll              |  90 +-
 llvm/test/CodeGen/AMDGPU/hoist-cond.ll        |   6 +-
 llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll       |  28 +-
 .../hsa-metadata-agpr-register-count.ll       |  18 +-
 .../CodeGen/AMDGPU/hsa-metadata-agpr-small.ll |   6 +-
 ...hsa-metadata-dynlds-func-hidden-args-v5.ll |   4 +-
 ...-metadata-dynlds-funcarg-hidden-args-v5.ll |   4 +-
 ...-metadata-dynlds-kernarg-hidden-args-v5.ll |   4 +-
 .../AMDGPU/hsa-metadata-enqueue-kernel-.ll    |  13 +-
 .../AMDGPU/hsa-metadata-from-llvm-ir-full.ll  |  84 +-
 .../CodeGen/AMDGPU/hsa-metadata-heap-v5.ll    |  28 +-
 .../AMDGPU/hsa-metadata-hidden-args-v4.ll     |  18 +-
 .../AMDGPU/hsa-metadata-hidden-args-v5.ll     |   4 +-
 .../hsa-metadata-hostcall-present-asan.ll     |   4 +-
 .../AMDGPU/hsa-metadata-hostcall-v4.ll        |  58 +-
 .../AMDGPU/hsa-metadata-hostcall-v5.ll        |  28 +-
 .../AMDGPU/hsa-metadata-kernel-code-props.ll  |  10 +-
 .../hsa-metadata-multigrid-sync-arg-v5.ll     |  28 +-
 .../AMDGPU/hsa-metadata-queueptr-v5.ll        |  28 +-
 ...tadata-resource-usage-function-ordering.ll |   6 +-
 .../hsa-metadata-uniform-workgroup-size-v5.ll |   6 +-
 llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll |   7 +-
 .../CodeGen/AMDGPU/i1-copy-implicit-def.ll    |   4 +-
 .../AMDGPU/i1-copy-phi-uniform-branch.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll       |   4 +-
 llvm/test/CodeGen/AMDGPU/icmp.i16.ll          |  45 +-
 .../identical-subrange-spill-infloop.ll       |  19 +-
 .../AMDGPU/illegal-eviction-assert.mir        |   4 +-
 .../AMDGPU/illegal-sgpr-to-vgpr-copy.ll       |  17 +-
 llvm/test/CodeGen/AMDGPU/image-attributes.ll  |  30 +-
 .../test/CodeGen/AMDGPU/image-load-d16-tfe.ll |  10 +-
 llvm/test/CodeGen/AMDGPU/image-resource-id.ll |  46 +-
 llvm/test/CodeGen/AMDGPU/image-schedule.ll    |  23 +-
 llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll  |   8 +-
 llvm/test/CodeGen/AMDGPU/imm.ll               |   6 +-
 llvm/test/CodeGen/AMDGPU/immv216.ll           |  68 +-
 .../CodeGen/AMDGPU/implicit-arg-v5-opt.ll     |  35 +-
 .../AMDGPU/implicitarg-offset-attributes.ll   |  27 +-
 .../AMDGPU/indirect-addressing-si-gfx9.ll     |   8 +-
 .../AMDGPU/indirect-addressing-si-pregfx9.ll  |  12 +-
 .../CodeGen/AMDGPU/indirect-addressing-si.ll  |  28 +-
 .../CodeGen/AMDGPU/indirect-private-64.ll     |  13 +-
 ...nfloop-subrange-spill-inspect-subrange.mir |   4 +-
 .../CodeGen/AMDGPU/infloop-subrange-spill.mir |   4 +-
 .../AMDGPU/inline-asm-reserved-regs.ll        |   6 +-
 llvm/test/CodeGen/AMDGPU/inline-maxbb.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/inlineasm-16.ll      |  18 +-
 llvm/test/CodeGen/AMDGPU/inlineasm-packed.ll  |  26 +-
 llvm/test/CodeGen/AMDGPU/inlineasm-v16.ll     |   2 -
 .../CodeGen/AMDGPU/insert-delay-alu-bug.ll    |   4 +-
 .../CodeGen/AMDGPU/insert-waitcnts-callee.mir |   4 +-
 .../CodeGen/AMDGPU/insert-waitcnts-exp.mir    |   6 +-
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll |   7 +-
 .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 119 ++-
 ...insert_vector_elt.v2i16.subtest-nosaddr.ll |   9 +-
 .../insert_vector_elt.v2i16.subtest-saddr.ll  |   9 +-
 llvm/test/CodeGen/AMDGPU/internalize.ll       |  14 +-
 .../AMDGPU/invariant-load-no-alias-store.ll   |   6 +-
 .../CodeGen/AMDGPU/invert-br-undef-vcc.mir    |   4 +-
 llvm/test/CodeGen/AMDGPU/ipra-regmask.ll      |  24 +-
 .../ipra-return-address-save-restore.ll       |  21 +-
 llvm/test/CodeGen/AMDGPU/ipra.ll              |  25 +-
 llvm/test/CodeGen/AMDGPU/kcache-fold.ll       |  13 +-
 .../AMDGPU/kernel-mubuf-with-voffset.mir      |   4 +-
 .../kernel-vgpr-spill-mubuf-with-voffset.ll   |   4 +-
 .../test/CodeGen/AMDGPU/kill-infinite-loop.ll |  16 +-
 llvm/test/CodeGen/AMDGPU/known-never-nan.ll   |   4 +-
 llvm/test/CodeGen/AMDGPU/known-never-snan.ll  | 139 ++-
 .../CodeGen/AMDGPU/large-alloca-compute.ll    |   4 +-
 .../CodeGen/AMDGPU/large-alloca-graphics.ll   |   6 +-
 .../AMDGPU/large-work-group-promote-alloca.ll |  31 +-
 llvm/test/CodeGen/AMDGPU/lds-alignment.ll     |  36 +-
 llvm/test/CodeGen/AMDGPU/lds-bounds.ll        |  18 +-
 llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll  |   9 +-
 .../CodeGen/AMDGPU/lds-m0-init-in-loop.ll     |   4 +-
 llvm/test/CodeGen/AMDGPU/lds-relocs.ll        |   7 +-
 ...alize-amdgcn.raw.buffer.load.format.f16.ll |  10 +-
 .../legalize-amdgcn.raw.buffer.load.format.ll |  10 +-
 ...e-amdgcn.raw.ptr.buffer.load.format.f16.ll |  10 +-
 ...alize-amdgcn.raw.ptr.buffer.load.format.ll |  10 +-
 ...egalize-amdgcn.raw.ptr.tbuffer.load.f16.ll |  10 +-
 .../legalize-amdgcn.raw.ptr.tbuffer.load.ll   |  10 +-
 .../legalize-amdgcn.raw.tbuffer.load.f16.ll   |  10 +-
 .../legalize-amdgcn.raw.tbuffer.load.ll       |  10 +-
 llvm/test/CodeGen/AMDGPU/literals.ll          |   4 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll   |   9 +-
 .../AMDGPU/llvm.amdgcn.atomic.cond.sub.ll     |  31 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll |  10 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll  |  66 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll |   7 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll  |  68 +-
 .../AMDGPU/llvm.amdgcn.buffer.atomic.ll       |  44 +-
 .../AMDGPU/llvm.amdgcn.buffer.load.format.ll  |   8 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll |  14 +-
 .../AMDGPU/llvm.amdgcn.buffer.store.format.ll |  11 +-
 .../AMDGPU/llvm.amdgcn.buffer.store.ll        |  17 +-
 .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll      |   6 +-
 .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll   |   6 +-
 .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll  |   6 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll | 201 ++---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll   |   9 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll |   7 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll |   7 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll |   7 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll |   7 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll |   7 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll  |  18 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll  |  18 +-
 .../AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll      |  28 +-
 .../AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll      |  28 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll   |  30 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll |   7 +-
 .../AMDGPU/llvm.amdgcn.dispatch.ptr.ll        |   8 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll   |  19 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll   |  27 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll |  14 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll  |  27 +-
 ...llvm.amdgcn.ds.gws.barrier-fastregalloc.ll |   7 +-
 .../AMDGPU/llvm.amdgcn.ds.gws.barrier.ll      |  37 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll |  24 +-
 .../AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll      |   6 +-
 .../AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll       |   6 +-
 .../llvm.amdgcn.ds.gws.sema.release.all.ll    |   6 +-
 .../AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll       |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll  |   8 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll  |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll   |  38 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll   | 106 ++-
 .../CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll    |   7 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp2.ll  |  10 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll    |  12 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll    |  12 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll   |   7 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll   |  11 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll |  25 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll |  24 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll |  13 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll   |  27 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll  |  23 +-
 .../AMDGPU/llvm.amdgcn.groupstaticsize.ll     |  19 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll    |  10 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll    |  10 +-
 .../AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir |   3 +-
 .../AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir |   3 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll    |  17 +-
 .../AMDGPU/llvm.amdgcn.image.a16.dim.ll       |  88 +-
 .../AMDGPU/llvm.amdgcn.image.a16.encode.ll    |  88 +-
 .../AMDGPU/llvm.amdgcn.image.atomic.dim.ll    |  48 +-
 .../AMDGPU/llvm.amdgcn.image.d16.dim.ll       |  34 +-
 .../AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll    |  78 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll   | 126 ++-
 .../llvm.amdgcn.image.gather4.a16.dim.ll      |  32 +-
 .../llvm.amdgcn.image.gather4.d16.dim.ll      |   6 +-
 .../AMDGPU/llvm.amdgcn.image.gather4.dim.ll   |  34 +-
 .../AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll |  28 +-
 .../AMDGPU/llvm.amdgcn.image.getlod.dim.ll    |   8 +-
 .../AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll   |  16 +-
 .../AMDGPU/llvm.amdgcn.image.load.a16.d16.ll  |   3 -
 .../AMDGPU/llvm.amdgcn.image.load.a16.ll      |   3 -
 .../AMDGPU/llvm.amdgcn.image.msaa.load.ll     |  23 +-
 .../AMDGPU/llvm.amdgcn.image.msaa.load.x.ll   |  19 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll   |  17 +-
 .../llvm.amdgcn.image.sample.a16.dim.ll       |  84 +-
 .../llvm.amdgcn.image.sample.cd.a16.dim.ll    |  20 +-
 .../AMDGPU/llvm.amdgcn.image.sample.cd.dim.ll |  20 +-
 .../llvm.amdgcn.image.sample.cd.g16.encode.ll |  20 +-
 .../AMDGPU/llvm.amdgcn.image.sample.cd.g16.ll |  20 +-
 .../llvm.amdgcn.image.sample.d16.dim.ll       |  26 +-
 .../AMDGPU/llvm.amdgcn.image.sample.dim.ll    |  98 +-
 .../llvm.amdgcn.image.sample.g16.a16.dim.ll   |  88 +-
 .../llvm.amdgcn.image.sample.g16.encode.ll    |  26 +-
 .../AMDGPU/llvm.amdgcn.image.sample.g16.ll    |  26 +-
 .../AMDGPU/llvm.amdgcn.image.sample.o.dim.ll  |  92 +-
 .../AMDGPU/llvm.amdgcn.image.store.a16.d16.ll |   3 -
 .../AMDGPU/llvm.amdgcn.image.store.a16.ll     |   3 -
 .../llvm.amdgcn.implicit.ptr.buffer.hsa.ll    |   9 +-
 .../AMDGPU/llvm.amdgcn.implicit.ptr.buffer.ll |   9 +-
 .../AMDGPU/llvm.amdgcn.implicitarg.ptr.ll     |  45 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll   |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll  |  18 +-
 .../AMDGPU/llvm.amdgcn.interp.inreg.ll        |  33 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll |  47 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.is.private.ll  |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll   |   6 +-
 .../AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll |  23 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll  |  18 +-
 .../AMDGPU/llvm.amdgcn.lds.direct.load.ll     |   7 +-
 .../AMDGPU/llvm.amdgcn.lds.param.load.ll      |   7 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll  |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll   |   9 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll   |  10 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll |  31 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll   |  12 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll |  22 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll |  54 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll     |   6 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll  |  54 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll     |  22 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll    |  10 +-
 .../AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll     |  20 +-
 .../AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll        |  36 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll     |   8 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll     |   9 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll     |   9 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll  |  25 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll     |  16 +-
 .../AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll      |  20 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll   |   6 +-
 .../llvm.amdgcn.raw.buffer.atomic.fadd.ll     |   6 +-
 .../AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll   |  32 +-
 .../llvm.amdgcn.raw.buffer.load.format.ll     |   8 +-
 .../AMDGPU/llvm.amdgcn.raw.buffer.load.ll     |  29 +-
 .../llvm.amdgcn.raw.buffer.store.format.ll    |  11 +-
 .../AMDGPU/llvm.amdgcn.raw.buffer.store.ll    |  37 +-
 .../llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll |   6 +-
 .../llvm.amdgcn.raw.ptr.buffer.atomic.ll      |  32 +-
 .../llvm.amdgcn.raw.ptr.buffer.load.format.ll |   8 +-
 .../AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll |  29 +-
 ...llvm.amdgcn.raw.ptr.buffer.store.format.ll |  11 +-
 .../llvm.amdgcn.raw.ptr.buffer.store.ll       |  37 +-
 .../llvm.amdgcn.raw.ptr.tbuffer.store.ll      |  10 +-
 .../AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll   |  10 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll  |  19 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll   |  54 +-
 .../AMDGPU/llvm.amdgcn.readfirstlane.ll       |  17 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.readlane.ll    |  24 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll   |  13 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll  |  19 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll   |  33 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   |  10 +-
 .../AMDGPU/llvm.amdgcn.s.barrier.wait.ll      |  82 +-
 .../AMDGPU/llvm.amdgcn.s.dcache.inv.ll        |  10 +-
 .../AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll    |  10 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll |  10 +-
 .../AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll     |  10 +-
 .../AMDGPU/llvm.amdgcn.s.decperflevel.ll      |   6 +-
 .../llvm.amdgcn.s.get.waveid.in.workgroup.ll  |   4 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll     |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll    |   7 +-
 .../AMDGPU/llvm.amdgcn.s.incperflevel.ll      |   6 +-
 .../AMDGPU/llvm.amdgcn.s.memrealtime.ll       |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll   |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll   |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll    |   4 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll     |   6 +-
 .../AMDGPU/llvm.amdgcn.s.wait.event.ll        |   8 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll   |   9 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll   |   8 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll     |   8 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll  | 101 +--
 .../AMDGPU/llvm.amdgcn.sched.barrier.ll       |  15 +-
 .../llvm.amdgcn.sched.group.barrier.gfx11.ll  |  14 +-
 .../AMDGPU/llvm.amdgcn.sched.group.barrier.ll |  40 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll     |  12 +-
 .../llvm.amdgcn.set.inactive.chain.arg.ll     |  16 +-
 .../AMDGPU/llvm.amdgcn.set.inactive.ll        |  16 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll |   9 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll   |   9 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll     |  26 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll    |   6 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll  |  10 +-
 .../llvm.amdgcn.struct.buffer.atomic.fadd.ll  |   6 +-
 .../llvm.amdgcn.struct.buffer.atomic.ll       |  32 +-
 .../llvm.amdgcn.struct.buffer.load.format.ll  |  25 +-
 ....amdgcn.struct.buffer.load.format.v3f16.ll |   6 +-
 .../AMDGPU/llvm.amdgcn.struct.buffer.load.ll  |  30 +-
 .../llvm.amdgcn.struct.buffer.store.format.ll |  13 +-
 .../AMDGPU/llvm.amdgcn.struct.buffer.store.ll |  33 +-
 ...vm.amdgcn.struct.ptr.buffer.atomic.fadd.ll |   6 +-
 .../llvm.amdgcn.struct.ptr.buffer.atomic.ll   |  32 +-
 ...vm.amdgcn.struct.ptr.buffer.load.format.ll |  25 +-
 ...gcn.struct.ptr.buffer.load.format.v3f16.ll |   6 +-
 .../llvm.amdgcn.struct.ptr.buffer.load.ll     |  30 +-
 ...m.amdgcn.struct.ptr.buffer.store.format.ll |  13 +-
 .../llvm.amdgcn.struct.ptr.buffer.store.ll    |  34 +-
 .../llvm.amdgcn.struct.ptr.tbuffer.store.ll   |  13 +-
 .../llvm.amdgcn.struct.tbuffer.store.ll       |  13 +-
 .../AMDGPU/llvm.amdgcn.tbuffer.store.ll       |  13 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll  | 109 ++-
 .../CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll  |  22 +-
 .../AMDGPU/llvm.amdgcn.wave.barrier.ll        |  13 +-
 .../AMDGPU/llvm.amdgcn.wavefrontsize.ll       |  10 +-
 .../AMDGPU/llvm.amdgcn.workgroup.id.ll        |  15 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll |  15 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll  |  56 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll    |  12 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.writelane.ll   |  24 +-
 llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll    |   9 +-
 llvm/test/CodeGen/AMDGPU/llvm.exp.ll          |  50 +-
 llvm/test/CodeGen/AMDGPU/llvm.exp10.ll        |  50 +-
 llvm/test/CodeGen/AMDGPU/llvm.exp2.ll         |  48 +-
 llvm/test/CodeGen/AMDGPU/llvm.frexp.ll        |  18 +-
 .../CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll    |  10 +-
 .../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll     |  10 +-
 llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll        |  32 +-
 llvm/test/CodeGen/AMDGPU/llvm.log.ll          |  48 +-
 llvm/test/CodeGen/AMDGPU/llvm.log10.ll        |  48 +-
 llvm/test/CodeGen/AMDGPU/llvm.log2.ll         |  48 +-
 llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll   |  20 +-
 llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll   |  24 +-
 llvm/test/CodeGen/AMDGPU/llvm.powi.ll         |   8 +-
 llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll    |   8 +-
 .../CodeGen/AMDGPU/llvm.r600.group.barrier.ll |  12 +-
 .../AMDGPU/llvm.r600.read.local.size.ll       |  38 +-
 llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll     |   6 +-
 llvm/test/CodeGen/AMDGPU/llvm.rint.ll         |  14 +-
 llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll    |  35 +-
 llvm/test/CodeGen/AMDGPU/llvm.round.ll        |  41 +-
 llvm/test/CodeGen/AMDGPU/llvm.sin.ll          |  30 +-
 llvm/test/CodeGen/AMDGPU/load-constant-f64.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i1.ll  |  90 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i16.ll |  68 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i32.ll |  52 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i64.ll |  14 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i8.ll  | 114 ++-
 llvm/test/CodeGen/AMDGPU/load-global-f32.ll   |  22 +-
 llvm/test/CodeGen/AMDGPU/load-global-f64.ll   |  14 +-
 llvm/test/CodeGen/AMDGPU/load-global-i1.ll    |  90 +-
 llvm/test/CodeGen/AMDGPU/load-global-i16.ll   |  68 +-
 llvm/test/CodeGen/AMDGPU/load-global-i32.ll   |  52 +-
 llvm/test/CodeGen/AMDGPU/load-global-i64.ll   |  14 +-
 llvm/test/CodeGen/AMDGPU/load-global-i8.ll    | 114 ++-
 llvm/test/CodeGen/AMDGPU/load-hi16.ll         | 106 ++-
 llvm/test/CodeGen/AMDGPU/load-input-fold.ll   |   8 +-
 llvm/test/CodeGen/AMDGPU/load-lo16.ll         |  98 +-
 .../CodeGen/AMDGPU/load-local-f32-no-ds128.ll |   2 -
 llvm/test/CodeGen/AMDGPU/load-local-f32.ll    |  14 +-
 llvm/test/CodeGen/AMDGPU/load-local-f64.ll    |  14 +-
 llvm/test/CodeGen/AMDGPU/load-local-i1.ll     |  90 +-
 llvm/test/CodeGen/AMDGPU/load-local-i16.ll    |  66 +-
 llvm/test/CodeGen/AMDGPU/load-local-i32.ll    |  44 +-
 llvm/test/CodeGen/AMDGPU/load-local-i64.ll    |  14 +-
 llvm/test/CodeGen/AMDGPU/load-local-i8.ll     | 114 ++-
 llvm/test/CodeGen/AMDGPU/load-weird-sizes.ll  |   6 +-
 .../CodeGen/AMDGPU/local-memory.amdgcn.ll     |  14 +-
 llvm/test/CodeGen/AMDGPU/local-memory.ll      |   6 +-
 llvm/test/CodeGen/AMDGPU/local-memory.r600.ll |  16 +-
 .../local-stack-alloc-block-sp-reference.ll   |   4 +-
 .../AMDGPU/long-branch-reserve-register.ll    |  29 +-
 llvm/test/CodeGen/AMDGPU/loop-address.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/loop_break.ll        |  17 +-
 .../test/CodeGen/AMDGPU/loop_exit_with_xor.ll |   4 +-
 ...fer-fat-pointers-unoptimized-debug-data.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/lower-kernargs.ll    |  84 +-
 .../AMDGPU/lower-mem-intrinsics-threshold.ll  |   4 +-
 .../CodeGen/AMDGPU/lower-mem-intrinsics.ll    | 135 ++-
 .../AMDGPU/lower-module-lds-check-metadata.ll |   4 +-
 .../lower-range-metadata-intrinsic-call.ll    |  11 +-
 .../lower-work-group-id-intrinsics-hsa.ll     |   6 +-
 llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll        |  21 +-
 ...ne-sink-temporal-divergence-swdev407790.ll |  67 +-
 llvm/test/CodeGen/AMDGPU/mad-combine.ll       |  75 +-
 llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll        |  31 +-
 llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll        |  85 +-
 llvm/test/CodeGen/AMDGPU/mad-mix.ll           |  86 +-
 .../CodeGen/AMDGPU/mad24-get-global-id.ll     |  11 +-
 llvm/test/CodeGen/AMDGPU/mad_64_32.ll         |  39 +-
 llvm/test/CodeGen/AMDGPU/mad_uint24.ll        |   4 +-
 llvm/test/CodeGen/AMDGPU/madak.ll             |  22 +-
 llvm/test/CodeGen/AMDGPU/madmk.ll             |  27 +-
 llvm/test/CodeGen/AMDGPU/max-literals.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/max-sgprs.ll         |   4 +-
 llvm/test/CodeGen/AMDGPU/max3.ll              |  29 +-
 .../CodeGen/AMDGPU/mdt-preserving-crash.ll    |   4 +-
 llvm/test/CodeGen/AMDGPU/mem-builtins.ll      |  18 +-
 .../CodeGen/AMDGPU/memcpy-crash-issue63986.ll |   7 +-
 .../test/CodeGen/AMDGPU/memcpy-fixed-align.ll |   6 +-
 .../memory-legalizer-atomic-insert-end.mir    |   7 +-
 ...er-multiple-mem-operands-nontemporal-1.mir |  20 +-
 ...er-multiple-mem-operands-nontemporal-2.mir |   6 +-
 llvm/test/CodeGen/AMDGPU/merge-load-store.mir |  11 +-
 llvm/test/CodeGen/AMDGPU/merge-store-crash.ll |   4 +-
 .../test/CodeGen/AMDGPU/merge-store-usedef.ll |   7 +-
 llvm/test/CodeGen/AMDGPU/merge-stores.ll      |  77 +-
 llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll    |  19 +-
 llvm/test/CodeGen/AMDGPU/mfma-loop.ll         |  22 +-
 .../AMDGPU/mfma-no-register-aliasing.ll       |   8 +-
 .../AMDGPU/min-waves-per-eu-not-respected.ll  |   7 +-
 llvm/test/CodeGen/AMDGPU/min3.ll              |  33 +-
 llvm/test/CodeGen/AMDGPU/minmax.ll            |   5 +-
 llvm/test/CodeGen/AMDGPU/missing-store.ll     |   4 +-
 .../CodeGen/AMDGPU/mixed-wave32-wave64.ll     |  18 +-
 .../CodeGen/AMDGPU/mixed_wave32_wave64.ll     |  17 +-
 .../move-addr64-rsrc-dead-subreg-writes.ll    |   4 +-
 .../AMDGPU/move-to-valu-atomicrmw-system.ll   |   9 +-
 .../CodeGen/AMDGPU/move-to-valu-atomicrmw.ll  |   9 +-
 .../CodeGen/AMDGPU/move-to-valu-worklist.ll   |   4 +-
 llvm/test/CodeGen/AMDGPU/movreld-bug.ll       |   4 +-
 ...uf-legalize-operands-non-ptr-intrinsics.ll |  25 +-
 .../CodeGen/AMDGPU/mubuf-legalize-operands.ll |  25 +-
 .../CodeGen/AMDGPU/mubuf-offset-private.ll    |  38 +-
 .../mubuf-shader-vgpr-non-ptr-intrinsics.ll   |   7 +-
 llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll |   7 +-
 llvm/test/CodeGen/AMDGPU/mubuf.ll             |  12 +-
 .../CodeGen/AMDGPU/mul24-pass-ordering.ll     |  18 +-
 llvm/test/CodeGen/AMDGPU/mul_int24.ll         |  11 +-
 .../AMDGPU/multi-divergent-exit-region.ll     |  47 +-
 llvm/test/CodeGen/AMDGPU/multilevel-break.ll  |   7 +-
 .../AMDGPU/need-fp-from-vgpr-spills.ll        |  15 +-
 llvm/test/CodeGen/AMDGPU/nested-calls.ll      |  10 +-
 .../CodeGen/AMDGPU/nested-loop-conditions.ll  |  13 +-
 .../CodeGen/AMDGPU/no-dup-inst-prefetch.ll    |   7 +-
 .../AMDGPU/no-source-locations-in-prologue.ll |   6 +-
 llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll  |   7 +-
 llvm/test/CodeGen/AMDGPU/nsa-reassign.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/nsa-reassign.mir     |   6 +-
 llvm/test/CodeGen/AMDGPU/occupancy-levels.ll  |  86 +-
 llvm/test/CodeGen/AMDGPU/omod.ll              |  98 +-
 .../CodeGen/AMDGPU/opencl-image-metadata.ll   |   2 -
 llvm/test/CodeGen/AMDGPU/operand-folding.ll   |  21 +-
 ...rlapping-tuple-copy-implicit-op-failure.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/pack.v2f16.ll        |  43 +-
 llvm/test/CodeGen/AMDGPU/pack.v2i16.ll        |  39 +-
 llvm/test/CodeGen/AMDGPU/packed-op-sel.ll     |  55 +-
 .../AMDGPU/pal-metadata-3.0-callable.ll       |  36 +-
 llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll  |  10 +-
 .../AMDGPU/pal-simple-indirect-call.ll        |   4 +-
 ...al-regcopy-and-spill-missed-at-regalloc.ll |   4 +-
 .../AMDGPU/partial-sgpr-to-vgpr-spills.ll     | 171 ++--
 ...partially-dead-super-register-immediate.ll |   9 +-
 .../AMDGPU/pei-build-spill-partial-agpr.mir   |  19 +-
 .../AMDGPU/post-ra-soft-clause-dbg-info.ll    |   7 +-
 llvm/test/CodeGen/AMDGPU/predicate-dp4.ll     |   3 +-
 .../AMDGPU/preload-kernargs-IR-lowering.ll    |  16 +-
 .../AMDGPU/preload-kernargs-inreg-hints.ll    |  22 +-
 .../AMDGPU/preserve-wwm-copy-dst-reg.ll       |   4 +-
 llvm/test/CodeGen/AMDGPU/printf_builtin.ll    |   7 +-
 llvm/test/CodeGen/AMDGPU/printf_nobuiltin.ll  |   4 +-
 .../AMDGPU/private-access-no-objects.ll       |  10 +-
 .../CodeGen/AMDGPU/private-memory-r600.ll     |  28 +-
 .../AMDGPU/prologue-epilogue-markers.ll       |   4 +-
 .../AMDGPU/promote-alloca-addrspacecast.ll    |   4 +-
 .../AMDGPU/promote-alloca-array-allocation.ll |   6 +-
 .../AMDGPU/promote-alloca-bitcast-function.ll |  14 +-
 .../promote-alloca-invariant-markers.ll       |  18 +-
 .../CodeGen/AMDGPU/promote-alloca-lifetime.ll |  10 +-
 .../AMDGPU/promote-alloca-mem-intrinsics.ll   |  29 +-
 .../CodeGen/AMDGPU/promote-alloca-no-opts.ll  |   7 +-
 .../promote-alloca-padding-size-estimate.ll   |   8 +-
 .../promote-alloca-stored-pointer-value.ll    |  10 +-
 .../promote-alloca-to-lds-constantexpr-use.ll |  18 +-
 .../AMDGPU/promote-alloca-to-lds-icmp.ll      |  12 +-
 .../AMDGPU/promote-alloca-to-lds-phi.ll       |  18 +-
 .../AMDGPU/promote-alloca-to-lds-select.ll    |  19 +-
 .../promote-alloca-unhandled-intrinsic.ll     |   8 +-
 .../CodeGen/AMDGPU/promote-alloca-volatile.ll |   4 +-
 .../AMDGPU/promote-constOffset-to-imm.ll      |   4 +-
 .../test/CodeGen/AMDGPU/promote-vect3-load.ll |  14 +-
 .../propagate-attributes-bitcast-function.ll  |   7 +-
 .../AMDGPU/propagate-flat-work-group-size.ll  |  31 +-
 .../CodeGen/AMDGPU/propagate-waves-per-eu.ll  |  69 +-
 .../CodeGen/AMDGPU/ps-shader-arg-count.ll     |  39 +-
 llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll |  10 +-
 llvm/test/CodeGen/AMDGPU/ptrmask.ll           |   6 +-
 llvm/test/CodeGen/AMDGPU/pv-packing.ll        |   4 +-
 llvm/test/CodeGen/AMDGPU/pv.ll                |  17 +-
 .../AMDGPU/r600-constant-array-fixup.ll       |   4 +-
 ...nite-loop-bug-while-reorganizing-vector.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/r600.add.ll          |  11 +-
 .../AMDGPU/r600.work-item-intrinsics.ll       |  28 +-
 llvm/test/CodeGen/AMDGPU/rcp_iflag.ll         |  11 +-
 llvm/test/CodeGen/AMDGPU/read_register.ll     |  20 +-
 llvm/test/CodeGen/AMDGPU/readcyclecounter.ll  |   8 +-
 llvm/test/CodeGen/AMDGPU/readsteadycounter.ll |   6 +-
 .../AMDGPU/reassoc-mul-add-1-to-mad.ll        |  14 +-
 .../AMDGPU/reduce-load-width-alignment.ll     |   8 +-
 .../AMDGPU/reduce-store-width-alignment.ll    |  12 +-
 .../AMDGPU/reg-coalescer-sched-crash.ll       |   7 +-
 ...-unsatisfiable-overlapping-tuple-hints.mir |   4 +-
 ...lloc-failure-overlapping-insert-assert.mir |   4 +-
 .../regalloc-illegal-eviction-assert.ll       |   4 +-
 .../regalloc-introduces-copy-sgpr-to-agpr.mir |   4 +-
 .../remaining-virtual-register-operands.ll    |  10 +-
 ...emove-incompatible-extended-image-insts.ll |   4 +-
 .../AMDGPU/remove-incompatible-functions.ll   |  43 +-
 .../CodeGen/AMDGPU/remove-incompatible-gws.ll |  11 +-
 .../AMDGPU/remove-incompatible-s-time.ll      |   7 +-
 .../remove-incompatible-wave32-feature.ll     |   4 +-
 llvm/test/CodeGen/AMDGPU/repeated-divisor.ll  |  29 +-
 .../CodeGen/AMDGPU/reqd-work-group-size.ll    |  77 +-
 llvm/test/CodeGen/AMDGPU/ret.ll               |  42 +-
 llvm/test/CodeGen/AMDGPU/ret_jump.ll          |  28 +-
 .../CodeGen/AMDGPU/rewrite-out-arguments.ll   | 120 ++-
 .../AMDGPU/rewrite-partial-reg-uses-dbg.mir   |   4 +-
 .../CodeGen/AMDGPU/rewrite-undef-for-phi.ll   |   8 +-
 llvm/test/CodeGen/AMDGPU/roundeven.ll         |  24 +-
 llvm/test/CodeGen/AMDGPU/rsq.f32.ll           |   2 -
 llvm/test/CodeGen/AMDGPU/rsq.f64.ll           |   6 +-
 llvm/test/CodeGen/AMDGPU/rv7x0_count3.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/s_addk_i32.ll        |   7 +-
 llvm/test/CodeGen/AMDGPU/s_code_end.ll        |   8 +-
 llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll        |   7 +-
 llvm/test/CodeGen/AMDGPU/salu-to-valu.ll      |  37 +-
 .../CodeGen/AMDGPU/sampler-resource-id.ll     |  10 +-
 .../CodeGen/AMDGPU/scalar_to_vector.v8i16.ll  |  13 +-
 .../scc-clobbered-sgpr-to-vmem-spill.ll       |  95 +-
 .../CodeGen/AMDGPU/sched-crash-dbg-value.mir  |  39 +-
 .../AMDGPU/sched.barrier.inverted.mask.ll     |  73 +-
 .../AMDGPU/schedule-fs-loop-nested-if.ll      |   9 +-
 .../CodeGen/AMDGPU/schedule-fs-loop-nested.ll |  11 +-
 llvm/test/CodeGen/AMDGPU/schedule-fs-loop.ll  |  11 +-
 .../CodeGen/AMDGPU/schedule-global-loads.ll   |   5 +-
 llvm/test/CodeGen/AMDGPU/schedule-if-2.ll     |   4 +-
 llvm/test/CodeGen/AMDGPU/schedule-ilp.ll      |   7 +-
 ...schedule-regpressure-ilp-metric-spills.mir |   4 +-
 .../schedule-regpressure-limit-clustering.ll  |   7 +-
 .../AMDGPU/schedule-regpressure-limit.ll      |   4 +-
 .../AMDGPU/schedule-regpressure-limit2.ll     |   3 +-
 .../AMDGPU/schedule-regpressure-limit3.ll     |   7 +-
 .../schedule-regpressure-misched-max-waves.ll |   7 +-
 .../AMDGPU/schedule-relaxed-occupancy.ll      |   7 +-
 .../CodeGen/AMDGPU/schedule-xdl-resource.ll   |   4 +-
 .../AMDGPU/scheduler-subrange-crash.ll        |  13 +-
 llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll     |  46 +-
 .../AMDGPU/select-fabs-fneg-extract-legacy.ll |   7 +-
 .../AMDGPU/select-fabs-fneg-extract.f16.ll    |   8 +-
 .../select-fabs-fneg-extract.legal.f16.ll     |   4 +-
 .../AMDGPU/select-fabs-fneg-extract.ll        | 115 ++-
 .../AMDGPU/select-fabs-fneg-extract.v2f16.ll  |   8 +-
 llvm/test/CodeGen/AMDGPU/select-opt.ll        |  20 +-
 llvm/test/CodeGen/AMDGPU/select-vectors.ll    |  61 +-
 .../AMDGPU/set-inactive-wwm-overwrite.ll      |  12 +-
 .../CodeGen/AMDGPU/setcc-fneg-constant.ll     |  34 +-
 llvm/test/CodeGen/AMDGPU/setcc-sext.ll        |   6 +-
 llvm/test/CodeGen/AMDGPU/setcc.ll             |  62 +-
 llvm/test/CodeGen/AMDGPU/setcc64.ll           |  54 +-
 .../AMDGPU/sext-divergence-driven-isel.ll     |   5 +-
 llvm/test/CodeGen/AMDGPU/sext-in-reg.ll       |  81 +-
 llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/sgpr-copy.ll         |  92 +-
 ...sgpr-spill-incorrect-fi-bookkeeping-bug.ll |  95 +-
 .../CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll     |  35 +-
 .../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir |   4 +-
 llvm/test/CodeGen/AMDGPU/sgpr-spill.mir       |   6 +-
 .../AMDGPU/sgpr-spills-split-regalloc.ll      |  45 +-
 llvm/test/CodeGen/AMDGPU/sgprcopies.ll        |  15 +-
 llvm/test/CodeGen/AMDGPU/shared-op-cycle.ll   |   4 +-
 .../CodeGen/AMDGPU/shift-and-i128-ubfe.ll     |  17 +-
 .../test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll |  51 +-
 llvm/test/CodeGen/AMDGPU/shl.ll               |  14 +-
 llvm/test/CodeGen/AMDGPU/shl.v2i16.ll         |  21 +-
 llvm/test/CodeGen/AMDGPU/shl_add_constant.ll  |  21 +-
 llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll       | 101 +--
 llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll  |   6 +-
 .../test/CodeGen/AMDGPU/shl_add_ptr_global.ll |   9 +-
 .../CodeGen/AMDGPU/shrink-add-sub-constant.ll |  69 +-
 .../CodeGen/AMDGPU/si-annotate-cf-kill.ll     |  13 +-
 .../CodeGen/AMDGPU/si-annotate-cf-noloop.ll   |  11 +-
 .../AMDGPU/si-annotate-cf-unreachable.ll      |   7 +-
 llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll    |   8 +-
 .../CodeGen/AMDGPU/si-annotate-dbg-info.ll    |   4 +-
 .../AMDGPU/si-lower-control-flow-kill.ll      |   4 +-
 ...si-lower-control-flow-unreachable-block.ll |  12 +-
 .../si-opt-vgpr-liverange-bug-deadlanes.mir   |   4 +-
 .../si-optimize-vgpr-live-range-dbg-instr.ll  |   4 +-
 .../si-optimize-vgpr-live-range-dbg-instr.mir |   4 +-
 llvm/test/CodeGen/AMDGPU/si-scheduler.ll      |  30 +-
 llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll     | 252 +++---
 llvm/test/CodeGen/AMDGPU/si-spill-cf.ll       |  21 +-
 .../AMDGPU/si-triv-disjoint-mem-access.ll     |  37 +-
 llvm/test/CodeGen/AMDGPU/si-vector-hang.ll    |   4 +-
 llvm/test/CodeGen/AMDGPU/sibling-call.ll      |  61 +-
 llvm/test/CodeGen/AMDGPU/sign_extend.ll       |   6 +-
 llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll |  33 +-
 .../AMDGPU/simplifydemandedbits-recursion.ll  |   8 +-
 llvm/test/CodeGen/AMDGPU/sink-image-sample.ll |  15 +-
 llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll    |  21 +-
 llvm/test/CodeGen/AMDGPU/sint_to_fp.ll        |  21 +-
 llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll  |  10 +-
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll      |  43 +-
 .../skip-promote-alloca-vector-users.ll       |   8 +-
 llvm/test/CodeGen/AMDGPU/smed3.ll             | 110 ++-
 llvm/test/CodeGen/AMDGPU/sminmax.ll           |   5 +-
 llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll     |  25 +-
 llvm/test/CodeGen/AMDGPU/smrd.ll              | 126 ++-
 llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll     |   4 +-
 .../AMDGPU/snippet-copy-bundle-regression.mir |   4 +-
 .../soft-clause-exceeds-register-budget.ll    |  11 +-
 llvm/test/CodeGen/AMDGPU/sopk-compares.ll     |  81 +-
 llvm/test/CodeGen/AMDGPU/spill-agpr.ll        |  13 +-
 .../AMDGPU/spill-alloc-sgpr-init-bug.ll       |   6 +-
 .../test/CodeGen/AMDGPU/spill-cfg-position.ll |   6 +-
 .../AMDGPU/spill-csr-frame-ptr-reg-copy.ll    |   9 +-
 llvm/test/CodeGen/AMDGPU/spill-m0.ll          |  35 +-
 .../AMDGPU/spill-offset-calculation.ll        |  13 +-
 .../CodeGen/AMDGPU/spill-regpressure-less.mir |   4 +-
 .../CodeGen/AMDGPU/spill-scavenge-offset.ll   |   9 +-
 .../AMDGPU/spill-sgpr-stack-no-sgpr.ll        |  15 +-
 .../CodeGen/AMDGPU/spill-special-sgpr.mir     |   4 +-
 .../CodeGen/AMDGPU/spill-vector-superclass.ll |   4 +-
 .../test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll |   6 +-
 llvm/test/CodeGen/AMDGPU/spill-vgpr.ll        |  14 +-
 llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll   |  44 +-
 .../CodeGen/AMDGPU/split-arg-dbg-value.ll     |  19 +-
 llvm/test/CodeGen/AMDGPU/split-smrd.ll        |  16 +-
 .../AMDGPU/split-vector-memoperand-offsets.ll |  15 +-
 llvm/test/CodeGen/AMDGPU/splitkit.mir         |   8 +-
 llvm/test/CodeGen/AMDGPU/sra.ll               |   8 +-
 .../AMDGPU/sramecc-subtarget-feature-any.ll   |   4 +-
 .../sramecc-subtarget-feature-disabled.ll     |   4 +-
 .../sramecc-subtarget-feature-enabled.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/srl.ll               |   6 +-
 llvm/test/CodeGen/AMDGPU/ssubsat.ll           |  26 +-
 ...tack-pointer-offset-relative-frameindex.ll |   4 +-
 .../CodeGen/AMDGPU/stack-size-overflow.ll     |  13 +-
 llvm/test/CodeGen/AMDGPU/store-barrier.ll     |   7 +-
 llvm/test/CodeGen/AMDGPU/store-global.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/store-hi16.ll        |  68 +-
 llvm/test/CodeGen/AMDGPU/store-private.ll     |   5 +-
 llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll |  14 +-
 llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll   |  31 +-
 llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll   |  30 +-
 llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll   |  25 +-
 llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll    |  22 +-
 llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll    |  22 +-
 llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll    |  22 +-
 llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll   |  31 +-
 llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll   |  30 +-
 llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll   |  25 +-
 llvm/test/CodeGen/AMDGPU/strict_fpext.ll      |  51 +-
 llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll    |  38 +-
 llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll   |  31 +-
 llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll   |  30 +-
 llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll   |  25 +-
 llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll  |  27 +-
 llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll  |  27 +-
 llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll  |  27 +-
 .../AMDGPU/strictfp_f16_abi_promote.ll        |  64 +-
 llvm/test/CodeGen/AMDGPU/sub.i16.ll           |  25 +-
 llvm/test/CodeGen/AMDGPU/sub.v2i16.ll         |  31 +-
 .../CodeGen/AMDGPU/subreg-coalescer-crash.ll  |  13 +-
 .../AMDGPU/subreg-coalescer-undef-use.ll      |   6 +-
 llvm/test/CodeGen/AMDGPU/swdev380865.ll       |   8 +-
 .../switch-default-block-unreachable.ll       |  13 +-
 llvm/test/CodeGen/AMDGPU/swizzle-export.ll    |   4 +-
 llvm/test/CodeGen/AMDGPU/tail-call-cgp.ll     |   8 +-
 .../AMDGPU/tail-duplication-convergent.ll     |  30 +-
 llvm/test/CodeGen/AMDGPU/target-cpu.ll        |  28 +-
 .../AMDGPU/target-mem-intrinsic-metadata.ll   |   4 +-
 .../AMDGPU/tid-mul-func-xnack-all-off.ll      |   8 +-
 .../AMDGPU/tid-mul-func-xnack-all-on.ll       |   8 +-
 .../AMDGPU/tid-mul-func-xnack-any-off-1.ll    |   4 +-
 .../AMDGPU/tid-mul-func-xnack-any-off-2.ll    |   4 +-
 .../AMDGPU/tid-mul-func-xnack-any-on-1.ll     |   4 +-
 .../AMDGPU/tid-mul-func-xnack-any-on-2.ll     |   4 +-
 .../tid-mul-func-xnack-invalid-any-off-on.ll  |   7 +-
 .../CodeGen/AMDGPU/tid-one-func-xnack-off.ll  |   4 +-
 .../CodeGen/AMDGPU/tid-one-func-xnack-on.ll   |   4 +-
 .../transform-block-with-return-to-epilog.ll  |  12 +-
 llvm/test/CodeGen/AMDGPU/trap-abis.ll         |   7 +-
 llvm/test/CodeGen/AMDGPU/trap.ll              |   7 +-
 llvm/test/CodeGen/AMDGPU/uaddo.ll             |  34 +-
 llvm/test/CodeGen/AMDGPU/uaddsat.ll           |  26 +-
 llvm/test/CodeGen/AMDGPU/udivrem24.ll         |  10 +-
 llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll    |  21 +-
 llvm/test/CodeGen/AMDGPU/uint_to_fp.ll        |  23 +-
 llvm/test/CodeGen/AMDGPU/umed3.ll             | 116 ++-
 .../CodeGen/AMDGPU/unaligned-load-store.ll    |  74 +-
 .../AMDGPU/undefined-physreg-sgpr-spill.mir   |   6 +-
 .../AMDGPU/undefined-subreg-liverange.ll      |  11 +-
 .../unhandled-loop-condition-assertion.ll     |   8 +-
 .../AMDGPU/uniform-branch-intrinsic-cond.ll   |   6 +-
 llvm/test/CodeGen/AMDGPU/uniform-cfg.ll       |  14 +-
 llvm/test/CodeGen/AMDGPU/uniform-crash.ll     |   6 +-
 .../AMDGPU/uniform-loop-inside-nonuniform.ll  |   6 +-
 .../CodeGen/AMDGPU/uniform-phi-with-undef.ll  |   4 +-
 .../uniform-work-group-attribute-missing.ll   |   4 +-
 .../AMDGPU/uniform-work-group-multistep.ll    |   6 +-
 ...niform-work-group-nested-function-calls.ll |   4 +-
 ...ork-group-prevent-attribute-propagation.ll |   4 +-
 .../uniform-work-group-propagate-attribute.ll |  12 +-
 .../uniform-work-group-recursion-test.ll      |   8 +-
 .../CodeGen/AMDGPU/uniform-work-group-test.ll |   6 +-
 .../CodeGen/AMDGPU/unigine-liveness-crash.ll  |  23 +-
 llvm/test/CodeGen/AMDGPU/unpack-half.ll       |   4 +-
 .../CodeGen/AMDGPU/unsupported-image-a16.ll   |   4 +-
 .../CodeGen/AMDGPU/unsupported-image-g16.ll   |   4 +-
 .../CodeGen/AMDGPU/use-sgpr-multiple-times.ll |  93 +-
 llvm/test/CodeGen/AMDGPU/usubo.ll             |  33 +-
 llvm/test/CodeGen/AMDGPU/usubsat.ll           |  26 +-
 llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll       |   8 +-
 llvm/test/CodeGen/AMDGPU/v_cndmask.ll         |  95 +-
 llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll   |  22 +-
 llvm/test/CodeGen/AMDGPU/v_mac.ll             |  33 +-
 llvm/test/CodeGen/AMDGPU/v_mac_f16.ll         |  54 +-
 llvm/test/CodeGen/AMDGPU/v_madak_f16.ll       |   6 +-
 llvm/test/CodeGen/AMDGPU/v_pack.ll            |  23 +-
 llvm/test/CodeGen/AMDGPU/valu-i1.ll           |  17 +-
 .../CodeGen/AMDGPU/vector-alloca-limits.ll    |  27 +-
 .../CodeGen/AMDGPU/vector-extract-insert.ll   |  13 +-
 .../CodeGen/AMDGPU/vector_shuffle.packed.ll   |   8 +-
 .../CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll  |  46 +-
 ...r-descriptor-waterfall-loop-idom-update.ll |   6 +-
 llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll |  18 +-
 llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll    |  12 +-
 ...vgpr-spill-emergency-stack-slot-compute.ll |   9 +-
 .../AMDGPU/vgpr-spill-emergency-stack-slot.ll |  82 +-
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll   |  16 +-
 .../CodeGen/AMDGPU/vi-removed-intrinsics.ll   |   7 +-
 llvm/test/CodeGen/AMDGPU/vop-shrink.ll        |   7 +-
 llvm/test/CodeGen/AMDGPU/wait.ll              |  24 +-
 llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll  |  13 +-
 llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir |   8 +-
 llvm/test/CodeGen/AMDGPU/wave32.ll            |  77 +-
 llvm/test/CodeGen/AMDGPU/while-break.ll       |   6 +-
 .../AMDGPU/whole-wave-register-copy.ll        |   4 +-
 .../AMDGPU/whole-wave-register-spill.ll       |   6 +-
 .../CodeGen/AMDGPU/widen-vselect-and-mask.ll  |   7 +-
 .../AMDGPU/widen_extending_scalar_loads.ll    |  50 +-
 llvm/test/CodeGen/AMDGPU/wqm-gfx11.ll         |  18 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll               |  96 +-
 .../AMDGPU/write-register-vgpr-into-sgpr.ll   |  12 +-
 llvm/test/CodeGen/AMDGPU/write_register.ll    |  45 +-
 .../CodeGen/AMDGPU/wrong-transalu-pos-fix.ll  |  53 +-
 .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll |   4 +-
 .../AMDGPU/xnack-subtarget-feature-any.ll     |   4 +-
 .../xnack-subtarget-feature-disabled.ll       |   4 +-
 .../AMDGPU/xnack-subtarget-feature-enabled.ll |   4 +-
 llvm/test/CodeGen/AMDGPU/zero_extend.ll       |  12 +-
 .../AMDGPU/zext-divergence-driven-isel.ll     |   5 +-
 llvm/test/CodeGen/AMDGPU/zext-lid.ll          |  24 +-
 1203 files changed, 14188 insertions(+), 17701 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
index b04bc04ab22691..24d283aef339c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
@@ -11,9 +11,9 @@
 @lds0 = internal addrspace(3) global [512 x i32] undef
 @lds1 = internal addrspace(3) global [512 x i64] undef, align 8
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind speculatable willreturn memory(none)
 
-define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -92,7 +92,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -172,7 +172,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -229,7 +229,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -287,7 +287,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -361,7 +361,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -440,7 +440,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_system(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_system(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_ret_i32_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -519,7 +519,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_system(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -581,7 +581,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -648,7 +648,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_system(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_system(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i32_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -715,7 +715,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_system(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_ret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -807,7 +807,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -882,7 +882,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -962,7 +962,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1049,7 +1049,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_ret_i32_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1136,7 +1136,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1203,7 +1203,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1277,7 +1277,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i32_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1351,7 +1351,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_ret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1467,7 +1467,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1559,7 +1559,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1654,7 +1654,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1756,7 +1756,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1828,7 +1828,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1907,7 +1907,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i64_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1986,7 +1986,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_ret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2114,7 +2114,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2211,7 +2211,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #1 {
+define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
 ; CI-LABEL: atomic_dec_shl_base_lds_0:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2293,7 +2293,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind memory(none)
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
   %result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 9 syncscope("agent") seq_cst, align 4
@@ -2302,7 +2302,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -2386,7 +2386,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -2471,7 +2471,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -2533,7 +2533,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -2596,7 +2596,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2675,7 +2675,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2759,7 +2759,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_system(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_system(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_ret_i64_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2843,7 +2843,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_system(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2910,7 +2910,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2982,7 +2982,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_system(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_system(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i64_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -3054,7 +3054,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_system(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_ret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -3151,7 +3151,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -3231,7 +3231,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #1 {
+define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
 ; CI-LABEL: atomic_dec_shl_base_lds_0_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -3318,7 +3318,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out,
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind memory(none)
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
   %result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i64 9 syncscope("agent") seq_cst, align 8
@@ -3326,9 +3326,5 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out,
   store i64 %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind speculatable willreturn memory(none) }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind memory(none) }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
index f6a997fb0fb01b..6fef00a7d068af 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
@@ -11,9 +11,9 @@
 @lds0 = internal addrspace(3) global [512 x i32] undef, align 4
 @lds1 = internal addrspace(3) global [512 x i64] undef, align 8
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind speculatable willreturn memory(none)
 
-define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -92,7 +92,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -172,7 +172,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -229,7 +229,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -287,7 +287,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -361,7 +361,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -440,7 +440,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_sistem(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_sistem(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_ret_i32_offset_sistem:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -519,7 +519,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_sistem(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i32(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_noret_i32(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -581,7 +581,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32(ptr addrspace(1) %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -648,7 +648,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_system(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_system(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i32_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -715,7 +715,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_system(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_ret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -807,7 +807,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -882,7 +882,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #1 {
+define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
 ; CI-LABEL: atomic_inc_shl_base_lds_0_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -964,7 +964,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out,
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind memory(none)
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
   %result = atomicrmw uinc_wrap ptr addrspace(3) %arrayidx0, i32 9 syncscope("agent") seq_cst, align 4
@@ -973,7 +973,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -1057,7 +1057,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -1142,7 +1142,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_noret_i64(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_inc_noret_i64(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -1204,7 +1204,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64(ptr addrspace(3) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -1267,7 +1267,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(ptr addrspace(3) %ptr
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1346,7 +1346,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1430,7 +1430,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_system(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_system(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_ret_i64_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1514,7 +1514,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_system(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i64(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_noret_i64(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1581,7 +1581,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64(ptr addrspace(1) %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1653,7 +1653,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_system(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_system(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i64_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1725,7 +1725,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_system(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_ret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1822,7 +1822,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1902,7 +1902,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1982,7 +1982,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2069,7 +2069,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_ret_i32_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2156,7 +2156,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr %
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2223,7 +2223,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2297,7 +2297,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i32_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2371,7 +2371,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_ret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2487,7 +2487,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2579,7 +2579,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #1 {
+define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
 ; CI-LABEL: atomic_inc_shl_base_lds_0_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2666,7 +2666,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out,
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind memory(none)
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
   %result = atomicrmw uinc_wrap ptr addrspace(3) %arrayidx0, i64 9 syncscope("agent") seq_cst, align 8
@@ -2675,7 +2675,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2770,7 +2770,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2872,7 +2872,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_ret_i64_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2974,7 +2974,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr %
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i64(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -3046,7 +3046,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -3125,7 +3125,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i64_offset_system:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -3204,7 +3204,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_ret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -3332,7 +3332,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -3429,7 +3429,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1
   ret void
 }
 
-define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: nocse_lds_atomic_inc_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s6, s[4:5], 0x4
@@ -3531,9 +3531,5 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0,
   store i32 %result1, ptr addrspace(1) %out1, align 4
   ret void
 }
-
-attributes #0 = { nounwind speculatable willreturn memory(none) }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind memory(none) }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
index 132dc876b3b054..b497dc5a2d1db6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
@@ -677,14 +677,11 @@ define i64 @v_bswap_i48(i64 %src) {
   ret i64 %zext
 }
 
-declare i16 @llvm.bswap.i16(i16) #1
-declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) #1
-declare <3 x i16> @llvm.bswap.v3i16(<3 x i16>) #1
-declare i32 @llvm.bswap.i32(i32) #1
-declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) #1
-declare i64 @llvm.bswap.i64(i64) #1
-declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #1
-declare i48 @llvm.bswap.i48(i48) #1
-
-attributes #0 = { convergent nounwind readnone }
-attributes #1 = { nounwind readnone speculatable willreturn }
+declare i16 @llvm.bswap.i16(i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.bswap.v3i16(<3 x i16>) nounwind readnone speculatable willreturn
+declare i32 @llvm.bswap.i32(i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone speculatable willreturn
+declare i64 @llvm.bswap.i64(i64) nounwind readnone speculatable willreturn
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone speculatable willreturn
+declare i48 @llvm.bswap.i48(i48) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll
index e5aa82278a830e..fa2a31bd1683bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll
@@ -39,15 +39,10 @@ define amdgpu_cs void @test1_ptrs_reorderable(ptr addrspace(8) inreg %buf, i32 %
   ret void
 }
 
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #2
+declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) nounwind readonly
 
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #3
+declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) nounwind writeonly
 
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture, i32, i32, i32) #4
+declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture, i32, i32, i32) nounwind memory(argmem: read)
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture, i32, i32, i32) #5
-
-attributes #2 = { nounwind readonly }
-attributes #3 = { nounwind writeonly }
-attributes #4 = { nounwind memory(argmem: read) }
-attributes #5 = { nounwind memory(argmem: write) }
+declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture, i32, i32, i32) nounwind memory(argmem: write)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index 24652982c6584f..a6f85b19325e21 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -6,8 +6,8 @@
 ; stack. This test is likely redundant when all DAG and GlobalISel
 ; tests are unified.
 
-declare hidden void @external_void_func_v16i32_v16i32_v4i32(<16 x i32>, <16 x i32>, <4 x i32>) #0
-declare hidden void @external_void_func_byval(ptr addrspace(5) byval([16 x i32])) #0
+declare hidden void @external_void_func_v16i32_v16i32_v4i32(<16 x i32>, <16 x i32>, <4 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_byval(ptr addrspace(5) byval([16 x i32])) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 define amdgpu_kernel void @kernel_caller_stack() {
 ; MUBUF-LABEL: kernel_caller_stack:
@@ -449,7 +449,4 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
   ret void
 }
 
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #1
-
-attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-attributes #1 = { argmemonly nofree nounwind willreturn writeonly }
+declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) argmemonly nofree nounwind willreturn writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
index c7676e9da6f499..fa4204bd96e25d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
-define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 {
+define float @test_fmed3_f32_known_nnan_ieee_true(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_fmed3_f32_known_nnan_ieee_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -23,7 +23,7 @@ define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 {
   ret float %fmed
 }
 
-define half @test_fmed3_f16_known_nnan_ieee_false(half %a) #1 {
+define half @test_fmed3_f16_known_nnan_ieee_false(half %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_fmed3_f16_known_nnan_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -45,7 +45,7 @@ define half @test_fmed3_f16_known_nnan_ieee_false(half %a) #1 {
 }
 
 ; %fmin is known non-SNaN because fmin inputs are fcanonicalized
-define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
+define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true(float %a) "amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true" {
 ; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -70,7 +70,7 @@ define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
 }
 
 ; input may be SNaN. It's safe to clamp since third operand in fmed3 is 0.0
-define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true(float %a) #2 {
+define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true(float %a) "amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true" {
 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -93,7 +93,7 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp
 
 ; global nnan function attribute always forces clamp combine
 
-define float @test_fmed3_global_nnan(float %a) #3 {
+define float @test_fmed3_global_nnan(float %a) "no-nans-fp-math"="true" {
 ; GFX10-LABEL: test_fmed3_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -119,7 +119,7 @@ define float @test_fmed3_global_nnan(float %a) #3 {
 ; ------------------------------------------------------------------------------
 
 ; ieee=false requires known never NaN input
-define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
+define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -142,7 +142,7 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
 }
 
 ; ieee=true input is known non-SNaN but dx10_clamp=false
-define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false(float %a) #4 {
+define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false(float %a) "amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false" {
 ; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -168,7 +168,7 @@ define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false(float %a) #4 {
 }
 
 ; ieee=true dx10_clamp=true but input may be SNaN, clamp requires third operand in fmed3 to be 0.0
-define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
+define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) "amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true" {
 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -192,9 +192,3 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
 declare half @llvm.amdgcn.fmed3.f16(half, half, half)
 declare float @llvm.amdgcn.fmed3.f32(float, float, float)
 declare float @llvm.minnum.f32(float, float)
-
-attributes #0 = {"amdgpu-ieee"="true"}
-attributes #1 = {"amdgpu-ieee"="false"}
-attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
-attributes #3 = {"no-nans-fp-math"="true"}
-attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index ca0047bba6c4bd..0a8bf2553f02e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
-define float @test_min_max_ValK0_K1_f32(float %a) #0 {
+define float @test_min_max_ValK0_K1_f32(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_min_max_ValK0_K1_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -24,7 +24,7 @@ define float @test_min_max_ValK0_K1_f32(float %a) #0 {
   ret float %fmed
 }
 
-define double @test_min_max_K0Val_K1_f64(double %a) #1 {
+define double @test_min_max_K0Val_K1_f64(double %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_min_max_K0Val_K1_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -47,7 +47,7 @@ define double @test_min_max_K0Val_K1_f64(double %a) #1 {
 }
 
 ; min-max patterns for ieee=true, dx10_clamp=true don't have to check for NaNs
-define half @test_min_K1max_ValK0_f16(half %a) #2 {
+define half @test_min_K1max_ValK0_f16(half %a) "amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true" {
 ; GFX10-LABEL: test_min_K1max_ValK0_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -69,7 +69,7 @@ define half @test_min_K1max_ValK0_f16(half %a) #2 {
   ret half %fmed
 }
 
-define <2 x half> @test_min_K1max_K0Val_f16(<2 x half> %a) #1 {
+define <2 x half> @test_min_K1max_K0Val_f16(<2 x half> %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_min_K1max_K0Val_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -91,7 +91,7 @@ define <2 x half> @test_min_K1max_K0Val_f16(<2 x half> %a) #1 {
   ret <2 x half> %fmed
 }
 
-define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
+define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) "amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true" {
 ; GFX10-LABEL: test_min_max_splat_padded_with_undef:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -115,7 +115,7 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
 
 ; max-mix patterns work only for known non-NaN inputs
 
-define float @test_max_min_ValK1_K0_f32(float %a) #0 {
+define float @test_max_min_ValK1_K0_f32(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_max_min_ValK1_K0_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -137,7 +137,7 @@ define float @test_max_min_ValK1_K0_f32(float %a) #0 {
   ret float %fmed
 }
 
-define double @test_max_min_K1Val_K0_f64(double %a) #1 {
+define double @test_max_min_K1Val_K0_f64(double %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_max_min_K1Val_K0_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -159,7 +159,7 @@ define double @test_max_min_K1Val_K0_f64(double %a) #1 {
   ret double %fmed
 }
 
-define half @test_max_K0min_ValK1_f16(half %a) #0 {
+define half @test_max_K0min_ValK1_f16(half %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_max_K0min_ValK1_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -182,7 +182,7 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 {
 }
 
 ; treat undef as value that will result in a constant splat
-define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
+define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_max_K0min_K1Val_v2f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -206,7 +206,7 @@ define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
 
 ; global nnan function attribute always forces clamp combine
 
-define float @test_min_max_global_nnan(float %a) #3 {
+define float @test_min_max_global_nnan(float %a) "no-nans-fp-math"="true" {
 ; GFX10-LABEL: test_min_max_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -227,7 +227,7 @@ define float @test_min_max_global_nnan(float %a) #3 {
   ret float %fmed
 }
 
-define float @test_max_min_global_nnan(float %a) #3 {
+define float @test_max_min_global_nnan(float %a) "no-nans-fp-math"="true" {
 ; GFX10-LABEL: test_max_min_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -253,7 +253,7 @@ define float @test_max_min_global_nnan(float %a) #3 {
 ; ------------------------------------------------------------------------------
 
 ; min(max(Val, 1.0), 0.0), should be min(max(Val, 0.0), 1.0)
-define float @test_min_max_K0_gt_K1(float %a) #0 {
+define float @test_min_max_K0_gt_K1(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_min_max_K0_gt_K1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -276,7 +276,7 @@ define float @test_min_max_K0_gt_K1(float %a) #0 {
 }
 
 ; max(min(Val, 0.0), 1.0), should be max(min(Val, 1.0), 0.0)
-define float @test_max_min_K0_gt_K1(float %a) #0 {
+define float @test_max_min_K0_gt_K1(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_max_min_K0_gt_K1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -301,7 +301,7 @@ define float @test_max_min_K0_gt_K1(float %a) #0 {
 ; Input that can be NaN
 
 ; min-max patterns for ieee=false require known non-NaN input
-define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
+define float @test_min_max_maybe_NaN_input_ieee_false(float %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -326,7 +326,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 }
 
 ; clamp fails here since input can be NaN and dx10_clamp=false; fmed3 succeds
-define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #4 {
+define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) "amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false" {
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -351,7 +351,7 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #
 
 ; max-min patterns always require known non-NaN input
 
-define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
+define float @test_max_min_maybe_NaN_input_ieee_true(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -377,7 +377,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
   ret float %fmed
 }
 
-define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
+define float @test_max_min_maybe_NaN_input_ieee_false(float %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -411,8 +411,3 @@ declare double @llvm.minnum.f64(double, double)
 declare double @llvm.maxnum.f64(double, double)
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
-attributes #0 = {"amdgpu-ieee"="true"}
-attributes #1 = {"amdgpu-ieee"="false"}
-attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
-attributes #3 = {"no-nans-fp-math"="true"}
-attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
index b9846a6a555dce..01ce686ec9aa3f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
@@ -441,9 +441,7 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_ext_fma_mul_rhs(<4 x float> %
     ret <4 x float> %d
 }
 
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare half @llvm.fmuladd.f16(half, half, half) #0
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
-declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #0
-
-attributes #0 = { nounwind readnone }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+declare half @llvm.fmuladd.f16(half, half, half) nounwind readnone
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
index 362e25fa932fc0..c647f69159b78f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
@@ -1035,10 +1035,9 @@ define <4 x double> @test_f64_add_mul_rhs(<4 x double> %a, <4 x double> %b, <4 x
   ret <4 x double> %z
 }
 
-declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>) #0
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
-declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #0
-declare double @llvm.fmuladd.f64(double, double, double) #0
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare half @llvm.fmuladd.f16(half, half, half) #0
-attributes #0 = { nounwind readnone }
+declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) nounwind readnone
+declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+declare half @llvm.fmuladd.f16(half, half, half) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
index b60f4c12502644..228f5a3728cf44 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
@@ -254,10 +254,7 @@ define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1
   ret float %result
 }
 
-declare float @llvm.fma.f32(float, float, float) #0
-declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #1
-declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1 immarg) #1
-declare i1 @llvm.amdgcn.class.f32(float, i32) #1
-
-attributes #0 = { nounwind readnone speculatable willreturn }
-attributes #1 = { nounwind readnone speculatable }
+declare float @llvm.fma.f32(float, float, float) nounwind readnone speculatable willreturn
+declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) nounwind readnone speculatable
+declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1 immarg) nounwind readnone speculatable
+declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll
index 13828c2d8a6a0b..33ebaa35ae20a8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -stop-after=irtranslator -o - %s | FileCheck %s
 
-declare align(8) dereferenceable(8) ptr @declared_with_ret_deref() #0
-declare align(8) ptr @unknown_decl() #0
-declare align(8) dereferenceable(4) ptr @declared_with_ret_deref4() #0
-declare align(8) dereferenceable_or_null(8) ptr @declared_with_ret_deref_or_null() #0
-declare align(8) nonnull ptr @nonnull_decl() #0
-declare align(8) dereferenceable_or_null(4) ptr @declared_with_ret_deref_or_null4() #0
+declare align(8) dereferenceable(8) ptr @declared_with_ret_deref() "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare align(8) ptr @unknown_decl() "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare align(8) dereferenceable(4) ptr @declared_with_ret_deref4() "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare align(8) dereferenceable_or_null(8) ptr @declared_with_ret_deref_or_null() "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare align(8) nonnull ptr @nonnull_decl() "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare align(8) dereferenceable_or_null(4) ptr @declared_with_ret_deref_or_null4() "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 ; Should have dereferenceable on mem operand
 define i64 @load_deref_declaration_only() {
@@ -231,5 +231,3 @@ define i64 @load_deref_or_null_maxmimum_callsite_declaration_only() {
   %add = add i64 %load0, %load1
   ret i64 %add
 }
-
-attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index d4d5cb18bbd30e..31530934dfde24 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -223,7 +223,7 @@ exit:
 
 ; There is a divergent, according to machine uniformity info, g_brcond branch
 ; here, not lowered to si_if because of "amdgpu-flat-work-group-size"="1,1".
-define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3 x i32> inreg %.WorkgroupId, <3 x i32> %.LocalInvocationId) #0 {
+define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3 x i32> inreg %.WorkgroupId, <3 x i32> %.LocalInvocationId) nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1,1" {
 ; GFX10-LABEL: single_lane_execution_attribute:
 ; GFX10:       ; %bb.0: ; %.entry
 ; GFX10-NEXT:    s_getpc_b64 s[4:5]
@@ -338,5 +338,3 @@ declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
 declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg)
-
-attributes #0 = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1,1" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
index 9b0bd2752b8231..2363a77ac6ac26 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir
@@ -6,9 +6,7 @@
   define void @divergent_i1_phi_uniform_branch_simple() {ret void}
   define void @divergent_i1_phi_used_inside_loop() {ret void}
   define void @divergent_i1_phi_used_inside_loop_bigger_loop_body() {ret void}
-  define void @_amdgpu_cs_main() #0 {ret void}
-
-  attributes #0 = {"amdgpu-flat-work-group-size"="1,1"}
+  define void @_amdgpu_cs_main() "amdgpu-flat-work-group-size"="1,1" {ret void}
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
index 56bd7ddde6f527..822b3a521a35de 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
@@ -72,9 +72,7 @@ entry:
   ret void
 }
 
-declare ptr @llvm.returnaddress(i32 immarg) #0
-
-attributes #0 = { nofree nosync nounwind readnone willreturn }
+declare ptr @llvm.returnaddress(i32 immarg) nofree nosync nounwind readnone willreturn
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2, !3, !4, !5, !10}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
index 5dae7885f6bfb1..ef2298e29a529a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
@@ -23,6 +23,4 @@ define void @func_dynamic_stackalloc_vgpr_align4(i32 %n) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
index 146f344930a4ee..41cb7a86c03b89 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
@@ -202,7 +202,7 @@ define float @v_fdiv_f32(float %a, float %b) {
 }
 
 
-define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #0 {
+define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-IEEE-FASTFMA-LABEL: v_fdiv_f32_dynamic_denorm:
 ; GFX6-IEEE-FASTFMA:       ; %bb.0:
 ; GFX6-IEEE-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -521,7 +521,7 @@ define float @v_fdiv_f32_ulp25(float %a, float %b) {
   ret float %fdiv
 }
 
-define float @v_fdiv_f32_dynamic_25ulp(float %x, float %y) #0 {
+define float @v_fdiv_f32_dynamic_25ulp(float %x, float %y) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2543,7 +2543,7 @@ define <2 x float> @v_fdiv_v2f32_arcp_afn_ulp25(<2 x float> %a, <2 x float> %b)
 }
 
 
-define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #0 {
+define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
 ; GFX6-FASTFMA:       ; %bb.0:
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2815,7 +2815,7 @@ define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #0 {
   ret float %div
 }
 
-define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf(float %x, float %y, float %z) #0 {
+define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf(float %x, float %y, float %z) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2932,7 +2932,7 @@ define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf(float %x, float %y, float %z)
   ret float %div
 }
 
-define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user(float %x, float %y, float %z) #0 {
+define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user(float %x, float %y, float %z) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3058,7 +3058,7 @@ define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user(float %x, fl
 }
 
 
-define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #0 {
+define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-FASTFMA-LABEL: v_fdiv_neglhs_f32_dynamic:
 ; GFX6-FASTFMA:       ; %bb.0:
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3343,7 +3343,7 @@ define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #0 {
   ret float %div
 }
 
-define float @v_fdiv_neglhs_f32_dynamic_25ulp(float %x, float %y) #0 {
+define float @v_fdiv_neglhs_f32_dynamic_25ulp(float %x, float %y) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3462,7 +3462,7 @@ define float @v_fdiv_neglhs_f32_dynamic_25ulp(float %x, float %y) #0 {
 }
 
 
-define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #0 {
+define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-FASTFMA-LABEL: v_fdiv_negrhs_f32_dynamic:
 ; GFX6-FASTFMA:       ; %bb.0:
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3748,7 +3748,7 @@ define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #0 {
 }
 
 
-define float @v_fdiv_f32_constrhs0_dynamic(float %x) #0 {
+define float @v_fdiv_f32_constrhs0_dynamic(float %x) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-FASTFMA-LABEL: v_fdiv_f32_constrhs0_dynamic:
 ; GFX6-FASTFMA:       ; %bb.0:
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4030,7 +4030,7 @@ define float @v_fdiv_f32_constrhs0_dynamic(float %x) #0 {
   ret float %div
 }
 
-define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
+define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4127,7 +4127,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
 }
 
 
-define float @v_fdiv_f32_constlhs0_dynamic(float %x) #0 {
+define float @v_fdiv_f32_constlhs0_dynamic(float %x) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-FASTFMA-LABEL: v_fdiv_f32_constlhs0_dynamic:
 ; GFX6-FASTFMA:       ; %bb.0:
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4409,7 +4409,7 @@ define float @v_fdiv_f32_constlhs0_dynamic(float %x) #0 {
   ret float %div
 }
 
-define float @v_fdiv_f32_constlhs0_dynamic_25ulp(float %x) #0 {
+define float @v_fdiv_f32_constlhs0_dynamic_25ulp(float %x) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4510,7 +4510,7 @@ define float @v_fdiv_f32_constlhs0_dynamic_25ulp(float %x) #0 {
 }
 
 
-define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #0 {
+define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
 ; GFX6-FASTFMA:       ; %bb.0:
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4782,7 +4782,7 @@ define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #
   ret float %div
 }
 
-define float @v_fdiv_f32_dynamic_25ulp_nodenorm_x(float nofpclass(sub) %x, float %y) #0 {
+define float @v_fdiv_f32_dynamic_25ulp_nodenorm_x(float nofpclass(sub) %x, float %y) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4899,7 +4899,7 @@ define float @v_fdiv_f32_dynamic_25ulp_nodenorm_x(float nofpclass(sub) %x, float
   ret float %div
 }
 
-define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #0 {
+define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
 ; GFX6-FASTFMA:       ; %bb.0:
 ; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5171,7 +5171,7 @@ define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #
   ret float %div
 }
 
-define float @v_fdiv_f32_dynamic_25ulp_nodenorm_y(float %x, float nofpclass(sub) %y) #0 {
+define float @v_fdiv_f32_dynamic_25ulp_nodenorm_y(float %x, float nofpclass(sub) %y) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5290,7 +5290,5 @@ define float @v_fdiv_f32_dynamic_25ulp_nodenorm_y(float %x, float nofpclass(sub)
 
 !0 = !{float 2.500000e+00}
 
-attributes #0 = { "denormal-fp-math-f32"="dynamic,dynamic" }
-
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN-IEEE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
index aa9ebb9226cddc..4c6c232acf9f39 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
@@ -57,7 +57,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
   ret float %ret
 }
 
-define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) #0 {
+define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
   ; GFX940: bb.1 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -83,7 +83,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da
   ret void
 }
 
-define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) #0 {
+define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
   ; GFX940: bb.1 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -112,5 +112,3 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data
 }
 
 declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr, float)
-
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
index 68d8e3d747b869..ef0db6ff2616cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
@@ -42,7 +42,7 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %da
   ret double %ret
 }
 
-define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) #0 {
+define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -59,7 +59,7 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d
   ret void
 }
 
-define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) #0 {
+define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -83,5 +83,3 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
 }
 
 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr, double)
-
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
index 34635b077cd92d..7c68222b9a3d1d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
@@ -70,7 +70,7 @@ define double @v_floor_f64_ieee_fneg(double %x) {
   ret double %result
 }
 
-define double @v_floor_f64_nonieee(double %x) #1 {
+define double @v_floor_f64_nonieee(double %x) "amdgpu-ieee"="false" {
 ; GFX6-LABEL: v_floor_f64_nonieee:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -93,7 +93,7 @@ define double @v_floor_f64_nonieee(double %x) #1 {
   ret double %result
 }
 
-define double @v_floor_f64_nonieee_nnan(double %x) #1 {
+define double @v_floor_f64_nonieee_nnan(double %x) "amdgpu-ieee"="false" {
 ; GFX6-LABEL: v_floor_f64_nonieee_nnan:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -113,7 +113,7 @@ define double @v_floor_f64_nonieee_nnan(double %x) #1 {
   ret double %result
 }
 
-define double @v_floor_f64_non_ieee_fneg(double %x) #1 {
+define double @v_floor_f64_non_ieee_fneg(double %x) "amdgpu-ieee"="false" {
 ; GFX6-LABEL: v_floor_f64_non_ieee_fneg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -286,8 +286,5 @@ define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
   ret <2 x float> %cast
 }
 
-declare double @llvm.floor.f64(double) #0
-declare double @llvm.fabs.f64(double) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
-attributes #1 = { "amdgpu-ieee"="false" }
+declare double @llvm.floor.f64(double) nounwind readnone speculatable willreturn
+declare double @llvm.fabs.f64(double) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
index 99e6c5d06a0e19..cc7537e9421924 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
@@ -1019,18 +1019,16 @@ define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x,
   ret float %fma
 }
 
-declare half @llvm.fma.f16(half, half, half) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare double @llvm.fma.f64(double, double, double) #0
+declare half @llvm.fma.f16(half, half, half) nounwind readnone speculatable willreturn
+declare float @llvm.fma.f32(float, float, float) nounwind readnone speculatable willreturn
+declare double @llvm.fma.f64(double, double, double) nounwind readnone speculatable willreturn
 
-declare half @llvm.fabs.f16(half) #0
-declare float @llvm.fabs.f32(float) #0
+declare half @llvm.fabs.f16(half) nounwind readnone speculatable willreturn
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable willreturn
 
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #0
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) nounwind readnone speculatable willreturn
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone speculatable willreturn
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone speculatable willreturn
 
-declare <3 x half> @llvm.fma.v3f16(<3 x half>, <3 x half>, <3 x half>) #0
-declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare <3 x half> @llvm.fma.v3f16(<3 x half>, <3 x half>, <3 x half>) nounwind readnone speculatable willreturn
+declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index 75c4cd53e3bfc7..3c870a6da25416 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
-define float @test_min_max_ValK0_K1_f32(float %a) #0 {
+define float @test_min_max_ValK0_K1_f32(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_min_max_ValK0_K1_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -30,7 +30,7 @@ define float @test_min_max_ValK0_K1_f32(float %a) #0 {
   ret float %fmed
 }
 
-define float @test_min_max_K0Val_K1_f32(float %a) #1 {
+define float @test_min_max_K0Val_K1_f32(float %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_min_max_K0Val_K1_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -59,7 +59,7 @@ define float @test_min_max_K0Val_K1_f32(float %a) #1 {
 
 ; min-max patterns for ieee=true do not have to check for NaNs
 ; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
-define half @test_min_K1max_ValK0_f16(half %a) #0 {
+define half @test_min_K1max_ValK0_f16(half %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_min_K1max_ValK0_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -91,7 +91,7 @@ define half @test_min_K1max_ValK0_f16(half %a) #0 {
   ret half %fmed
 }
 
-define half @test_min_K1max_K0Val_f16(half %a) #1 {
+define half @test_min_K1max_K0Val_f16(half %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_min_K1max_K0Val_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -120,7 +120,7 @@ define half @test_min_K1max_K0Val_f16(half %a) #1 {
 }
 
 ; max-mix patterns work only for non-NaN inputs
-define float @test_max_min_ValK1_K0_f32(float %a) #0 {
+define float @test_max_min_ValK1_K0_f32(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_max_min_ValK1_K0_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -147,7 +147,7 @@ define float @test_max_min_ValK1_K0_f32(float %a) #0 {
   ret float %fmed
 }
 
-define float @test_max_min_K1Val_K0_f32(float %a) #1 {
+define float @test_max_min_K1Val_K0_f32(float %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_max_min_K1Val_K0_f32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -174,7 +174,7 @@ define float @test_max_min_K1Val_K0_f32(float %a) #1 {
   ret float %fmed
 }
 
-define half @test_max_K0min_ValK1_f16(half %a) #0 {
+define half @test_max_K0min_ValK1_f16(half %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_max_K0min_ValK1_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -202,7 +202,7 @@ define half @test_max_K0min_ValK1_f16(half %a) #0 {
   ret half %fmed
 }
 
-define half @test_max_K0min_K1Val_f16(half %a) #1 {
+define half @test_max_K0min_K1Val_f16(half %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_max_K0min_K1Val_f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -232,7 +232,7 @@ define half @test_max_K0min_K1Val_f16(half %a) #1 {
 
 ; global nnan function attribute always forces fmed3 combine
 
-define float @test_min_max_global_nnan(float %a) #2 {
+define float @test_min_max_global_nnan(float %a) "no-nans-fp-math"="true" {
 ; GFX10-LABEL: test_min_max_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -259,7 +259,7 @@ define float @test_min_max_global_nnan(float %a) #2 {
   ret float %fmed
 }
 
-define float @test_max_min_global_nnan(float %a) #2 {
+define float @test_max_min_global_nnan(float %a) "no-nans-fp-math"="true" {
 ; GFX10-LABEL: test_max_min_global_nnan:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -291,7 +291,7 @@ define float @test_max_min_global_nnan(float %a) #2 {
 ; ------------------------------------------------------------------------------
 
 ; min(max(Val, K0), K1) K0 > K1, should be K0<=K1
-define float @test_min_max_K0_gt_K1(float %a) #0 {
+define float @test_min_max_K0_gt_K1(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_min_max_K0_gt_K1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -321,7 +321,7 @@ define float @test_min_max_K0_gt_K1(float %a) #0 {
 }
 
 ; max(min(Val, K1), K0) K0 > K1, should be K0<=K1
-define float @test_max_min_K0_gt_K1(float %a) #0 {
+define float @test_max_min_K0_gt_K1(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_max_min_K0_gt_K1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -351,7 +351,7 @@ define float @test_max_min_K0_gt_K1(float %a) #0 {
 }
 
 ; non-inline constant
-define float @test_min_max_non_inline_const(float %a) #0 {
+define float @test_min_max_non_inline_const(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_min_max_non_inline_const:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -382,7 +382,7 @@ define float @test_min_max_non_inline_const(float %a) #0 {
 
 ; there is no fmed3 for f64 or v2f16 types
 
-define double @test_min_max_f64(double %a) #0 {
+define double @test_min_max_f64(double %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_min_max_f64:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -413,7 +413,7 @@ define double @test_min_max_f64(double %a) #0 {
   ret double %fmed
 }
 
-define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
+define <2 x half> @test_min_max_v2f16(<2 x half> %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_min_max_v2f16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -452,7 +452,7 @@ define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
 ; input that can be NaN
 
 ; min-max patterns for ieee=false require known non-NaN input
-define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
+define float @test_min_max_maybe_NaN_input_ieee_false(float %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -485,7 +485,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 
 ; max-min patterns always require known non-NaN input
 
-define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
+define float @test_max_min_maybe_NaN_input_ieee_false(float %a) "amdgpu-ieee"="false" {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -517,7 +517,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 }
 
 ; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
-define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
+define float @test_max_min_maybe_NaN_input_ieee_true(float %a) "amdgpu-ieee"="true" {
 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -558,6 +558,3 @@ declare double @llvm.minnum.f64(double, double)
 declare double @llvm.maxnum.f64(double, double)
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
-attributes #0 = {"amdgpu-ieee"="true"}
-attributes #1 = {"amdgpu-ieee"="false"}
-attributes #2 = {"no-nans-fp-math"="true"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
index fab94875516973..4368968164abd7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
@@ -40,7 +40,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -208,5 +208,3 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
   %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
   ret <2 x i16> %ret
 }
-
-attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index 255c6dedbd6e1e..839b01e4f5313f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -1087,7 +1087,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1129,7 +1129,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1158,7 +1158,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1200,7 +1200,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1248,7 +1248,7 @@ main_body:
   ret double %ret
 }
 
-define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) #1 {
+define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1288,7 +1288,7 @@ main_body:
   ret double %ret
 }
 
-define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) #1 {
+define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1313,7 +1313,7 @@ main_body:
   ret double %ret
 }
 
-define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) #1 {
+define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1431,7 +1431,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1472,7 +1472,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1501,7 +1501,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1543,7 +1543,7 @@ main_body:
   ret void
 }
 
-define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
+define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1583,7 +1583,7 @@ main_body:
   ret double %ret
 }
 
-define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
+define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1608,7 +1608,7 @@ main_body:
   ret double %ret
 }
 
-define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
+define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1863,7 +1863,7 @@ main_body:
   ret double %ret
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
@@ -1889,7 +1889,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
@@ -1915,7 +1915,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #4 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
@@ -1962,7 +1962,7 @@ main_body:
   ret void
 }
 
-define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) #1 {
+define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1984,7 +1984,7 @@ main_body:
   ret double %ret
 }
 
-define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) #2 {
+define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2007,7 +2007,7 @@ main_body:
   ret double %ret
 }
 
-define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) #3 {
+define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) "denormal-fp-math"="ieee,ieee" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2029,9 +2029,3 @@ main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
 }
-
-attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" }
-attributes #2 = { "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #3 = { "denormal-fp-math"="ieee,ieee" }
-attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
index 05cdb54f5dd747..fc132377f3e548 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
@@ -2,7 +2,7 @@
 ; RUN:  llc -global-isel -amdgpu-scalarize-global-loads=false -enable-misched=0 -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck --check-prefix=CI %s
 ; RUN:  llc -global-isel -amdgpu-scalarize-global-loads=false -enable-misched=0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
 
-define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: frem_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -64,7 +64,7 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1
    ret void
 }
 
-define amdgpu_kernel void @fast_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @fast_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: fast_frem_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -110,7 +110,7 @@ define amdgpu_kernel void @fast_frem_f16(ptr addrspace(1) %out, ptr addrspace(1)
    ret void
 }
 
-define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #1 {
+define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: unsafe_frem_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -156,7 +156,7 @@ define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace(
    ret void
 }
 
-define amdgpu_kernel void @frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: frem_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -222,7 +222,7 @@ define amdgpu_kernel void @frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1
    ret void
 }
 
-define amdgpu_kernel void @fast_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @fast_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: fast_frem_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -266,7 +266,7 @@ define amdgpu_kernel void @fast_frem_f32(ptr addrspace(1) %out, ptr addrspace(1)
    ret void
 }
 
-define amdgpu_kernel void @unsafe_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #1 {
+define amdgpu_kernel void @unsafe_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: unsafe_frem_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -310,7 +310,7 @@ define amdgpu_kernel void @unsafe_frem_f32(ptr addrspace(1) %out, ptr addrspace(
    ret void
 }
 
-define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: frem_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -373,7 +373,7 @@ define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1
    ret void
 }
 
-define amdgpu_kernel void @fast_frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @fast_frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: fast_frem_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -480,7 +480,7 @@ define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace(
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
-                             ptr addrspace(1) %in2) #1 {
+                             ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %r0 = load double, ptr addrspace(1) %in1, align 8
    %r1 = load double, ptr addrspace(1) %in2, align 8
    %r2 = frem double %r0, %r1
@@ -488,7 +488,7 @@ define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace(
    ret void
 }
 
-define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: frem_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -585,7 +585,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
    ret void
 }
 
-define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: frem_v4f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -744,7 +744,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
    ret void
 }
 
-define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: frem_v2f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -842,7 +842,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
    ret void
 }
 
-define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: frem_v4f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1004,7 +1004,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
    ret void
 }
 
-define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; CI-LABEL: frem_v2f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1097,6 +1097,3 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
    store <2 x double> %r2, ptr addrspace(1) %out, align 16
    ret void
 }
-
-attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index f9b98059be0b3a..f2b2c6f4446851 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -8051,35 +8051,33 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.fshl.i7(i7, i7, i7) #0
-declare i8 @llvm.fshl.i8(i8, i8, i8) #0
-declare <2 x i8> @llvm.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
+declare i7 @llvm.fshl.i7(i7, i7, i7) nounwind readnone speculatable willreturn
+declare i8 @llvm.fshl.i8(i8, i8, i8) nounwind readnone speculatable willreturn
+declare <2 x i8> @llvm.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) nounwind readnone speculatable willreturn
+declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) nounwind readnone speculatable willreturn
 
-declare i16 @llvm.fshl.i16(i16, i16, i16) #0
-declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.fshl.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.fshl.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
+declare i16 @llvm.fshl.i16(i16, i16, i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) nounwind readnone speculatable willreturn
+declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone speculatable willreturn
+declare <5 x i16> @llvm.fshl.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) nounwind readnone speculatable willreturn
+declare <6 x i16> @llvm.fshl.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) nounwind readnone speculatable willreturn
+declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone speculatable willreturn
 
-declare i24 @llvm.fshl.i24(i24, i24, i24) #0
-declare <2 x i24> @llvm.fshl.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
+declare i24 @llvm.fshl.i24(i24, i24, i24) nounwind readnone speculatable willreturn
+declare <2 x i24> @llvm.fshl.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) nounwind readnone speculatable willreturn
 
-declare i32 @llvm.fshl.i32(i32, i32, i32) #0
-declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.fshl.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.fshl.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
+declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone speculatable willreturn
+declare <3 x i32> @llvm.fshl.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) nounwind readnone speculatable willreturn
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone speculatable willreturn
+declare <5 x i32> @llvm.fshl.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) nounwind readnone speculatable willreturn
+declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) nounwind readnone speculatable willreturn
 
-declare i48 @llvm.fshl.i48(i48, i48, i48) #0
+declare i48 @llvm.fshl.i48(i48, i48, i48) nounwind readnone speculatable willreturn
 
-declare i64 @llvm.fshl.i64(i64, i64, i64) #0
-declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
+declare i64 @llvm.fshl.i64(i64, i64, i64) nounwind readnone speculatable willreturn
+declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone speculatable willreturn
 
-declare i128 @llvm.fshl.i128(i128, i128, i128) #0
-declare <2 x i128> @llvm.fshl.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i128 @llvm.fshl.i128(i128, i128, i128) nounwind readnone speculatable willreturn
+declare <2 x i128> @llvm.fshl.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
index c8455665e7b40f..789f3d7a2789ee 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
@@ -8120,35 +8120,33 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.fshr.i7(i7, i7, i7) #0
-declare i8 @llvm.fshr.i8(i8, i8, i8) #0
-declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
+declare i7 @llvm.fshr.i7(i7, i7, i7) nounwind readnone speculatable willreturn
+declare i8 @llvm.fshr.i8(i8, i8, i8) nounwind readnone speculatable willreturn
+declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) nounwind readnone speculatable willreturn
+declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) nounwind readnone speculatable willreturn
 
-declare i16 @llvm.fshr.i16(i16, i16, i16) #0
-declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
+declare i16 @llvm.fshr.i16(i16, i16, i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) nounwind readnone speculatable willreturn
+declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone speculatable willreturn
+declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) nounwind readnone speculatable willreturn
+declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) nounwind readnone speculatable willreturn
+declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone speculatable willreturn
 
-declare i24 @llvm.fshr.i24(i24, i24, i24) #0
-declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
+declare i24 @llvm.fshr.i24(i24, i24, i24) nounwind readnone speculatable willreturn
+declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) nounwind readnone speculatable willreturn
 
-declare i32 @llvm.fshr.i32(i32, i32, i32) #0
-declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
+declare i32 @llvm.fshr.i32(i32, i32, i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone speculatable willreturn
+declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) nounwind readnone speculatable willreturn
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone speculatable willreturn
+declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) nounwind readnone speculatable willreturn
+declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) nounwind readnone speculatable willreturn
 
-declare i48 @llvm.fshr.i48(i48, i48, i48) #0
+declare i48 @llvm.fshr.i48(i48, i48, i48) nounwind readnone speculatable willreturn
 
-declare i64 @llvm.fshr.i64(i64, i64, i64) #0
-declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
+declare i64 @llvm.fshr.i64(i64, i64, i64) nounwind readnone speculatable willreturn
+declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone speculatable willreturn
 
-declare i128 @llvm.fshr.i128(i128, i128, i128) #0
-declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i128 @llvm.fshr.i128(i128, i128, i128) nounwind readnone speculatable willreturn
+declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
index e6c835fa25406a..945559af591eea 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
@@ -3,7 +3,7 @@
 
 ; FIXME: Also test with a pre-gfx8 target.
 
-define i1 @i1_func_void() #0 {
+define i1 @i1_func_void() nounwind {
   ; CHECK-LABEL: name: i1_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -15,7 +15,7 @@ define i1 @i1_func_void() #0 {
   ret i1 %val
 }
 
-define zeroext i1 @i1_zeroext_func_void() #0 {
+define zeroext i1 @i1_zeroext_func_void() nounwind {
   ; CHECK-LABEL: name: i1_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -27,7 +27,7 @@ define zeroext i1 @i1_zeroext_func_void() #0 {
   ret i1 %val
 }
 
-define signext i1 @i1_signext_func_void() #0 {
+define signext i1 @i1_signext_func_void() nounwind {
   ; CHECK-LABEL: name: i1_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -39,7 +39,7 @@ define signext i1 @i1_signext_func_void() #0 {
   ret i1 %val
 }
 
-define i7 @i7_func_void() #0 {
+define i7 @i7_func_void() nounwind {
   ; CHECK-LABEL: name: i7_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -51,7 +51,7 @@ define i7 @i7_func_void() #0 {
   ret i7 %val
 }
 
-define zeroext i7 @i7_zeroext_func_void() #0 {
+define zeroext i7 @i7_zeroext_func_void() nounwind {
   ; CHECK-LABEL: name: i7_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -63,7 +63,7 @@ define zeroext i7 @i7_zeroext_func_void() #0 {
   ret i7 %val
 }
 
-define signext i7 @i7_signext_func_void() #0 {
+define signext i7 @i7_signext_func_void() nounwind {
   ; CHECK-LABEL: name: i7_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -75,7 +75,7 @@ define signext i7 @i7_signext_func_void() #0 {
   ret i7 %val
 }
 
-define i8 @i8_func_void() #0 {
+define i8 @i8_func_void() nounwind {
   ; CHECK-LABEL: name: i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -87,7 +87,7 @@ define i8 @i8_func_void() #0 {
   ret i8 %val
 }
 
-define zeroext i8 @i8_zeroext_func_void() #0 {
+define zeroext i8 @i8_zeroext_func_void() nounwind {
   ; CHECK-LABEL: name: i8_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -99,7 +99,7 @@ define zeroext i8 @i8_zeroext_func_void() #0 {
   ret i8 %val
 }
 
-define signext i8 @i8_signext_func_void() #0 {
+define signext i8 @i8_signext_func_void() nounwind {
   ; CHECK-LABEL: name: i8_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -111,7 +111,7 @@ define signext i8 @i8_signext_func_void() #0 {
   ret i8 %val
 }
 
-define i16 @i16_func_void() #0 {
+define i16 @i16_func_void() nounwind {
   ; CHECK-LABEL: name: i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -123,7 +123,7 @@ define i16 @i16_func_void() #0 {
   ret i16 %val
 }
 
-define zeroext i16 @i16_zeroext_func_void() #0 {
+define zeroext i16 @i16_zeroext_func_void() nounwind {
   ; CHECK-LABEL: name: i16_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -135,7 +135,7 @@ define zeroext i16 @i16_zeroext_func_void() #0 {
   ret i16 %val
 }
 
-define signext i16 @i16_signext_func_void() #0 {
+define signext i16 @i16_signext_func_void() nounwind {
   ; CHECK-LABEL: name: i16_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -147,7 +147,7 @@ define signext i16 @i16_signext_func_void() #0 {
   ret i16 %val
 }
 
-define half @f16_func_void() #0 {
+define half @f16_func_void() nounwind {
   ; CHECK-LABEL: name: f16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -159,7 +159,7 @@ define half @f16_func_void() #0 {
   ret half %val
 }
 
-define i24 @i24_func_void() #0 {
+define i24 @i24_func_void() nounwind {
   ; CHECK-LABEL: name: i24_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -171,7 +171,7 @@ define i24 @i24_func_void() #0 {
   ret i24 %val
 }
 
-define zeroext i24 @i24_zeroext_func_void() #0 {
+define zeroext i24 @i24_zeroext_func_void() nounwind {
   ; CHECK-LABEL: name: i24_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -183,7 +183,7 @@ define zeroext i24 @i24_zeroext_func_void() #0 {
   ret i24 %val
 }
 
-define signext i24 @i24_signext_func_void() #0 {
+define signext i24 @i24_signext_func_void() nounwind {
   ; CHECK-LABEL: name: i24_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -195,7 +195,7 @@ define signext i24 @i24_signext_func_void() #0 {
   ret i24 %val
 }
 
-define <2 x i24> @v2i24_func_void() #0 {
+define <2 x i24> @v2i24_func_void() nounwind {
   ; CHECK-LABEL: name: v2i24_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -210,7 +210,7 @@ define <2 x i24> @v2i24_func_void() #0 {
   ret <2 x i24> %val
 }
 
-define <3 x i24> @v3i24_func_void() #0 {
+define <3 x i24> @v3i24_func_void() nounwind {
   ; CHECK-LABEL: name: v3i24_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -227,7 +227,7 @@ define <3 x i24> @v3i24_func_void() #0 {
   ret <3 x i24> %val
 }
 
-define i32 @i32_func_void() #0 {
+define i32 @i32_func_void() nounwind {
   ; CHECK-LABEL: name: i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -238,7 +238,7 @@ define i32 @i32_func_void() #0 {
   ret i32 %val
 }
 
-define i48 @i48_func_void() #0 {
+define i48 @i48_func_void() nounwind {
   ; CHECK-LABEL: name: i48_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -252,7 +252,7 @@ define i48 @i48_func_void() #0 {
   ret i48 %val
 }
 
-define signext i48 @i48_signext_func_void() #0 {
+define signext i48 @i48_signext_func_void() nounwind {
   ; CHECK-LABEL: name: i48_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -266,7 +266,7 @@ define signext i48 @i48_signext_func_void() #0 {
   ret i48 %val
 }
 
-define zeroext i48 @i48_zeroext_func_void() #0 {
+define zeroext i48 @i48_zeroext_func_void() nounwind {
   ; CHECK-LABEL: name: i48_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -280,7 +280,7 @@ define zeroext i48 @i48_zeroext_func_void() #0 {
   ret i48 %val
 }
 
-define i64 @i64_func_void() #0 {
+define i64 @i64_func_void() nounwind {
   ; CHECK-LABEL: name: i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -293,7 +293,7 @@ define i64 @i64_func_void() #0 {
   ret i64 %val
 }
 
-define i65 @i65_func_void() #0 {
+define i65 @i65_func_void() nounwind {
   ; CHECK-LABEL: name: i65_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -308,7 +308,7 @@ define i65 @i65_func_void() #0 {
   ret i65 %val
 }
 
-define signext i65 @i65_signext_func_void() #0 {
+define signext i65 @i65_signext_func_void() nounwind {
   ; CHECK-LABEL: name: i65_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -323,7 +323,7 @@ define signext i65 @i65_signext_func_void() #0 {
   ret i65 %val
 }
 
-define zeroext i65 @i65_zeroext_func_void() #0 {
+define zeroext i65 @i65_zeroext_func_void() nounwind {
   ; CHECK-LABEL: name: i65_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -338,7 +338,7 @@ define zeroext i65 @i65_zeroext_func_void() #0 {
   ret i65 %val
 }
 
-define float @f32_func_void() #0 {
+define float @f32_func_void() nounwind {
   ; CHECK-LABEL: name: f32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -349,7 +349,7 @@ define float @f32_func_void() #0 {
   ret float %val
 }
 
-define double @f64_func_void() #0 {
+define double @f64_func_void() nounwind {
   ; CHECK-LABEL: name: f64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -362,7 +362,7 @@ define double @f64_func_void() #0 {
   ret double %val
 }
 
-define <2 x double> @v2f64_func_void() #0 {
+define <2 x double> @v2f64_func_void() nounwind {
   ; CHECK-LABEL: name: v2f64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -377,7 +377,7 @@ define <2 x double> @v2f64_func_void() #0 {
   ret <2 x double> %val
 }
 
-define <2 x i32> @v2i32_func_void() #0 {
+define <2 x i32> @v2i32_func_void() nounwind {
   ; CHECK-LABEL: name: v2i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -390,7 +390,7 @@ define <2 x i32> @v2i32_func_void() #0 {
   ret <2 x i32> %val
 }
 
-define <3 x i32> @v3i32_func_void() #0 {
+define <3 x i32> @v3i32_func_void() nounwind {
   ; CHECK-LABEL: name: v3i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -404,7 +404,7 @@ define <3 x i32> @v3i32_func_void() #0 {
   ret <3 x i32> %val
 }
 
-define <4 x i32> @v4i32_func_void() #0 {
+define <4 x i32> @v4i32_func_void() nounwind {
   ; CHECK-LABEL: name: v4i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -419,7 +419,7 @@ define <4 x i32> @v4i32_func_void() #0 {
   ret <4 x i32> %val
 }
 
-define <5 x i32> @v5i32_func_void() #0 {
+define <5 x i32> @v5i32_func_void() nounwind {
   ; CHECK-LABEL: name: v5i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -435,7 +435,7 @@ define <5 x i32> @v5i32_func_void() #0 {
   ret <5 x i32> %val
 }
 
-define <8 x i32> @v8i32_func_void() #0 {
+define <8 x i32> @v8i32_func_void() nounwind {
   ; CHECK-LABEL: name: v8i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -456,7 +456,7 @@ define <8 x i32> @v8i32_func_void() #0 {
   ret <8 x i32> %val
 }
 
-define <16 x i32> @v16i32_func_void() #0 {
+define <16 x i32> @v16i32_func_void() nounwind {
   ; CHECK-LABEL: name: v16i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -485,7 +485,7 @@ define <16 x i32> @v16i32_func_void() #0 {
   ret <16 x i32> %val
 }
 
-define <32 x i32> @v32i32_func_void() #0 {
+define <32 x i32> @v32i32_func_void() nounwind {
   ; CHECK-LABEL: name: v32i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -530,7 +530,7 @@ define <32 x i32> @v32i32_func_void() #0 {
   ret <32 x i32> %val
 }
 
-define <2 x i64> @v2i64_func_void() #0 {
+define <2 x i64> @v2i64_func_void() nounwind {
   ; CHECK-LABEL: name: v2i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -545,7 +545,7 @@ define <2 x i64> @v2i64_func_void() #0 {
   ret <2 x i64> %val
 }
 
-define <3 x i64> @v3i64_func_void() #0 {
+define <3 x i64> @v3i64_func_void() nounwind {
   ; CHECK-LABEL: name: v3i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -564,7 +564,7 @@ define <3 x i64> @v3i64_func_void() #0 {
   ret <3 x i64> %val
 }
 
-define <4 x i64> @v4i64_func_void() #0 {
+define <4 x i64> @v4i64_func_void() nounwind {
   ; CHECK-LABEL: name: v4i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -585,7 +585,7 @@ define <4 x i64> @v4i64_func_void() #0 {
   ret <4 x i64> %val
 }
 
-define <5 x i64> @v5i64_func_void() #0 {
+define <5 x i64> @v5i64_func_void() nounwind {
   ; CHECK-LABEL: name: v5i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -608,7 +608,7 @@ define <5 x i64> @v5i64_func_void() #0 {
   ret <5 x i64> %val
 }
 
-define <8 x i64> @v8i64_func_void() #0 {
+define <8 x i64> @v8i64_func_void() nounwind {
   ; CHECK-LABEL: name: v8i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -637,7 +637,7 @@ define <8 x i64> @v8i64_func_void() #0 {
   ret <8 x i64> %val
 }
 
-define <16 x i64> @v16i64_func_void() #0 {
+define <16 x i64> @v16i64_func_void() nounwind {
   ; CHECK-LABEL: name: v16i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -682,7 +682,7 @@ define <16 x i64> @v16i64_func_void() #0 {
   ret <16 x i64> %val
 }
 
-define <2 x i16> @v2i16_func_void() #0 {
+define <2 x i16> @v2i16_func_void() nounwind {
   ; CHECK-LABEL: name: v2i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -693,7 +693,7 @@ define <2 x i16> @v2i16_func_void() #0 {
   ret <2 x i16> %val
 }
 
-define <2 x half> @v2f16_func_void() #0 {
+define <2 x half> @v2f16_func_void() nounwind {
   ; CHECK-LABEL: name: v2f16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -704,7 +704,7 @@ define <2 x half> @v2f16_func_void() #0 {
   ret <2 x half> %val
 }
 
-define <3 x i16> @v3i16_func_void() #0 {
+define <3 x i16> @v3i16_func_void() nounwind {
   ; CHECK-LABEL: name: v3i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -720,7 +720,7 @@ define <3 x i16> @v3i16_func_void() #0 {
   ret <3 x i16> %val
 }
 
-define <4 x i16> @v4i16_func_void() #0 {
+define <4 x i16> @v4i16_func_void() nounwind {
   ; CHECK-LABEL: name: v4i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -733,7 +733,7 @@ define <4 x i16> @v4i16_func_void() #0 {
   ret <4 x i16> %val
 }
 
-define <4 x half> @v4f16_func_void() #0 {
+define <4 x half> @v4f16_func_void() nounwind {
   ; CHECK-LABEL: name: v4f16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -746,7 +746,7 @@ define <4 x half> @v4f16_func_void() #0 {
   ret <4 x half> %val
 }
 
-define <5 x i16> @v5i16_func_void() #0 {
+define <5 x i16> @v5i16_func_void() nounwind {
   ; CHECK-LABEL: name: v5i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -765,7 +765,7 @@ define <5 x i16> @v5i16_func_void() #0 {
   ret <5 x i16> %val
 }
 
-define <8 x i16> @v8i16_func_void() #0 {
+define <8 x i16> @v8i16_func_void() nounwind {
   ; CHECK-LABEL: name: v8i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -782,7 +782,7 @@ define <8 x i16> @v8i16_func_void() #0 {
   ret <8 x i16> %val
 }
 
-define <16 x i16> @v16i16_func_void() #0 {
+define <16 x i16> @v16i16_func_void() nounwind {
   ; CHECK-LABEL: name: v16i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -803,7 +803,7 @@ define <16 x i16> @v16i16_func_void() #0 {
   ret <16 x i16> %val
 }
 
-define <16 x i8> @v16i8_func_void() #0 {
+define <16 x i8> @v16i8_func_void() nounwind {
   ; CHECK-LABEL: name: v16i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -864,7 +864,7 @@ define <16 x i8> @v16i8_func_void() #0 {
   ret <16 x i8> %val
 }
 
-define <2 x i8> @v2i8_func_void() #0 {
+define <2 x i8> @v2i8_func_void() nounwind {
   ; CHECK-LABEL: name: v2i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -881,7 +881,7 @@ define <2 x i8> @v2i8_func_void() #0 {
   ret <2 x i8> %val
 }
 
-define <3 x i8> @v3i8_func_void() #0 {
+define <3 x i8> @v3i8_func_void() nounwind {
   ; CHECK-LABEL: name: v3i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -901,7 +901,7 @@ define <3 x i8> @v3i8_func_void() #0 {
   ret <3 x i8> %val
 }
 
-define <4  x i8> @v4i8_func_void() #0 {
+define <4  x i8> @v4i8_func_void() nounwind {
   ; CHECK-LABEL: name: v4i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -926,7 +926,7 @@ define <4  x i8> @v4i8_func_void() #0 {
   ret <4  x i8> %val
 }
 
-define {i8, i32} @struct_i8_i32_func_void() #0 {
+define {i8, i32} @struct_i8_i32_func_void() nounwind {
   ; CHECK-LABEL: name: struct_i8_i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -942,7 +942,7 @@ define {i8, i32} @struct_i8_i32_func_void() #0 {
   ret { i8, i32 } %val
 }
 
-define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 {
+define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_sret_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -969,7 +969,7 @@ define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %ar
 ; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
 ; AssertZext inserted. Not using it introduces the spills.
 
-define <33 x i32> @v33i32_func_void() #0 {
+define <33 x i32> @v33i32_func_void() nounwind {
   ; CHECK-LABEL: name: v33i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -985,7 +985,7 @@ define <33 x i32> @v33i32_func_void() #0 {
   ret <33 x i32> %val
 }
 
-define <33 x i32> @v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 {
+define <33 x i32> @v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) nounwind {
   ; CHECK-LABEL: name: v33i32_func_v33i32_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1008,7 +1008,7 @@ define <33 x i32> @v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 {
   ret <33 x i32> %val
 }
 
-define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
+define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() nounwind {
   ; CHECK-LABEL: name: struct_v32i32_i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1030,7 +1030,7 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
   ret { <32 x i32>, i32 }%val
 }
 
-define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
+define { i32, <32 x i32> } @struct_i32_v32i32_func_void() nounwind {
   ; CHECK-LABEL: name: struct_i32_v32i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1053,7 +1053,7 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
 }
 
 ; Make sure the last struct component is returned in v3, not v4.
-define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
+define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() nounwind {
   ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
@@ -1088,7 +1088,7 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
   ret { <3 x i32>, i32 } %insert.4
 }
 
-define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
+define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() nounwind {
   ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
@@ -1123,7 +1123,7 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
   ret { <3 x float>, i32 } %insert.4
 }
 
-define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 {
+define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1153,7 +1153,7 @@ define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0)
   ret void
 }
 
-define i1022 @i1022_func_void() #0 {
+define i1022 @i1022_func_void() nounwind {
   ; CHECK-LABEL: name: i1022_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -1197,7 +1197,7 @@ define i1022 @i1022_func_void() #0 {
   ret i1022 %val
 }
 
-define signext i1022 @i1022_signext_func_void() #0 {
+define signext i1022 @i1022_signext_func_void() nounwind {
   ; CHECK-LABEL: name: i1022_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -1241,7 +1241,7 @@ define signext i1022 @i1022_signext_func_void() #0 {
   ret i1022 %val
 }
 
-define zeroext i1022 @i1022_zeroext_func_void() #0 {
+define zeroext i1022 @i1022_zeroext_func_void() nounwind {
   ; CHECK-LABEL: name: i1022_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -1287,7 +1287,7 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 {
 
 %struct.with.ptrs = type { <32 x i32>, ptr addrspace(3), ptr addrspace(1), <2 x ptr addrspace(1)> }
 
-define %struct.with.ptrs @ptr_in_struct_func_void() #0 {
+define %struct.with.ptrs @ptr_in_struct_func_void() nounwind {
   ; CHECK-LABEL: name: ptr_in_struct_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1318,5 +1318,3 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 {
   %val = load volatile %struct.with.ptrs, ptr addrspace(1) undef
   ret %struct.with.ptrs %val
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index 21832dc320e425..5c0c2da4d8ced8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -113,7 +113,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(ptr ad
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw
   ; GFX908_GFX11: bb.1 (%ir-block.0):
   ; GFX908_GFX11-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -139,7 +139,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
@@ -220,5 +220,3 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
 
 declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
 declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index e48d281f37c9aa..3cd322e3f2f5d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -120,7 +120,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(ptr addr
   ret float %ret
 }
 
-define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) #0 {
+define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -148,7 +148,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %p
   ret float %ret
 }
 
-define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 {
+define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
   ; GFX11: bb.1 (%ir-block.0):
   ; GFX11-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
@@ -241,5 +241,3 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
 
 declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
 declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
index b058ad1023e130..711a5df8155bdf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
@@ -166,7 +166,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(ptr add
   ret double %ret
 }
 
-define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -183,7 +183,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
+define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -206,7 +206,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %
   ret double %ret
 }
 
-define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -224,7 +224,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
+define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -250,5 +250,3 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
 
 declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
-
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
index 10cbc56cc5fbea..8b6461dc263bd2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
@@ -198,6 +198,4 @@ bb:
   ret <4 x float> %ret
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly willreturn }
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
index a1c99f5cf60297..d1a088337bcf1d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
@@ -4,7 +4,7 @@
 ; Check lowering of some large insertelement that use the stack
 ; instead of register indexing.
 
-define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr addrspace(1) %ptr, i32 %val, i32 %idx) #0 {
+define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr addrspace(1) %ptr, i32 %val, i32 %idx) "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" {
 ; GCN-LABEL: v_insert_v64i32_varidx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[20:23], s[4:5], 0x0
@@ -256,5 +256,3 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
   store <64 x i32> %insert, ptr addrspace(1) %out.ptr
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
index 3abc21f812e145..72afba8ff28276 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
 
-define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addrspace(1) %ptr.out) #0 {
+define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addrspace(1) %ptr.out) "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" {
 ; GCN-LABEL: v_insert_v64i32_37:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -158,7 +158,4 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" }
-attributes #1 = { nounwind readnone speculatable willreturn }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
index e9292f4e34dcda..d1bfcdfc764e64 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
@@ -119,8 +119,6 @@ define amdgpu_kernel void @workgroup_id_xz() {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
index e98f298adf58e6..0811ade785284d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=irtranslator %s -o - | FileCheck %s
 
 ; Check that we correctly skip over disabled inputs
-define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) #1 {
+define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) "InitialPSInputAddr"="0x00002" {
   ; CHECK-LABEL: name: disabled_input
   ; CHECK: bb.1.main_body:
   ; CHECK-NEXT:   liveins: $sgpr2, $vgpr0
@@ -13,11 +13,11 @@ define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float
   ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
   ; CHECK-NEXT:   S_ENDPGM 0
 main_body:
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) nounwind
   ret void
 }
 
-define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } %psinput0, float %psinput1) #1 {
+define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } %psinput0, float %psinput1) "InitialPSInputAddr"="0x00002" {
   ; CHECK-LABEL: name: disabled_input_struct
   ; CHECK: bb.1.main_body:
   ; CHECK-NEXT:   liveins: $sgpr2, $vgpr0
@@ -29,7 +29,7 @@ define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float }
   ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
   ; CHECK-NEXT:   S_ENDPGM 0
 main_body:
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) nounwind
   ret void
 }
 
@@ -152,7 +152,4 @@ define amdgpu_ps <2 x i16> @sgpr_return_v2i16(<2 x i16> %vgpr) {
   ret <2 x i16> %vgpr
 }
 
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)  #0
-
-attributes #0 = { nounwind }
-attributes #1 = { "InitialPSInputAddr"="0x00002" }
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)  nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
index 875b725a3c76c8..02f15da560676a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
@@ -10,7 +10,7 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
   ; CHECK-NEXT:   S_ENDPGM 0
-  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) nounwind
   ret void
 }
 
@@ -23,7 +23,7 @@ define amdgpu_vs void @test_f32(float %arg0) {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
   ; CHECK-NEXT:   S_ENDPGM 0
-  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) nounwind
   ret void
 }
 
@@ -55,7 +55,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, ptr addrspace(4)
   ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
   ; CHECK-NEXT:   S_ENDPGM 0
   %tmp0 = load volatile i32, ptr addrspace(4) %arg1
-  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) nounwind
   ret void
 }
 
@@ -70,7 +70,7 @@ define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %a
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY2]](s32), [[COPY]](s32), [[COPY3]](s32), [[COPY1]](s32), 0, 0
   ; CHECK-NEXT:   S_ENDPGM 0
-  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) nounwind
   ret void
 }
 
@@ -103,6 +103,4 @@ define amdgpu_vs i32 @non_void_ret() {
   ret i32 0
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)  #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)  nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index a5f59b15c11b84..39b193c646c9c7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -4,11 +4,11 @@
 
 ; Workitem IDs are passed to the kernel differently for gfx908
 
-declare hidden void @external_void_func_void() #0
-declare hidden void @external_void_func_i32(i32) #0
-declare hidden void @external_void_func_v32i32(<32 x i32>) #0
+declare hidden void @external_void_func_void() nounwind
+declare hidden void @external_void_func_i32(i32) nounwind
+declare hidden void @external_void_func_v32i32(<32 x i32>) nounwind
 
-define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) nounwind {
   ; GFX900-LABEL: name: test_call_external_void_func_i32
   ; GFX900: bb.1 (%ir-block.1):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -116,7 +116,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
   ret void
 }
 
-define void @test_func_call_external_void_func_i32() #0 {
+define void @test_func_call_external_void_func_i32() nounwind {
   ; GFX900-LABEL: name: test_func_call_external_void_func_i32
   ; GFX900: bb.1 (%ir-block.0):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -204,7 +204,7 @@ define void @test_func_call_external_void_func_i32() #0 {
 
 ; Explicit argument is split between registers ad the stack due to v31
 ; being used for workitem IDs.
-define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
+define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) nounwind {
   ; GFX900-LABEL: name: test_call_external_void_func_v32i32
   ; GFX900: bb.1 (%ir-block.1):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -384,7 +384,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
   ret void
 }
 
-define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
+define void @test_func_call_external_void_func_v32i32([17 x i8]) nounwind {
   ; GFX900-LABEL: name: test_func_call_external_void_func_v32i32
   ; GFX900: bb.1 (%ir-block.1):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -644,7 +644,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @test_only_workitem_id_x() nounwind !reqd_work_group_size !0 {
   ; GFX900-LABEL: name: test_only_workitem_id_x
   ; GFX900: bb.1 (%ir-block.0):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -730,7 +730,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
   ret void
 }
 
-define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1 {
+define amdgpu_kernel void @test_only_workitem_id_y() nounwind !reqd_work_group_size !1 {
   ; GFX900-LABEL: name: test_only_workitem_id_y
   ; GFX900: bb.1 (%ir-block.0):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -824,7 +824,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
   ret void
 }
 
-define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2 {
+define amdgpu_kernel void @test_only_workitem_id_z() nounwind !reqd_work_group_size !2 {
   ; GFX900-LABEL: name: test_only_workitem_id_z
   ; GFX900: bb.1 (%ir-block.0):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -918,7 +918,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
   ret void
 }
 
-define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !3 {
+define amdgpu_kernel void @test_only_workitem_id_xy() nounwind !reqd_work_group_size !3 {
   ; GFX900-LABEL: name: test_only_workitem_id_xy
   ; GFX900: bb.1 (%ir-block.0):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1014,7 +1014,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
   ret void
 }
 
-define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !4 {
+define amdgpu_kernel void @test_only_workitem_id_yz() nounwind !reqd_work_group_size !4 {
   ; GFX900-LABEL: name: test_only_workitem_id_yz
   ; GFX900: bb.1 (%ir-block.0):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1118,7 +1118,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
   ret void
 }
 
-define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !5 {
+define amdgpu_kernel void @test_only_workitem_id_xz() nounwind !reqd_work_group_size !5 {
   ; GFX900-LABEL: name: test_only_workitem_id_xz
   ; GFX900: bb.1 (%ir-block.0):
   ; GFX900-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1214,12 +1214,9 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-declare i32 @llvm.amdgcn.workitem.id.z() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable willreturn }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable willreturn
+declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone speculatable willreturn
 
 !llvm.module.flags = !{!6}
 !0 = !{i32 64, i32 1, i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
index fad833c0a6ad5e..f79118c77353b4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
@@ -2,13 +2,13 @@
 ; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
 
 ; amdgpu_gfx calling convention
-declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_void() nounwind
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) nounwind
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) nounwind
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) nounwind
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) nounwind
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_void() nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
@@ -22,7 +22,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -41,7 +41,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr4
@@ -60,7 +60,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -86,7 +86,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -111,7 +111,3 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
   call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 37f2118572d84e..35e165b1555fba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -1,73 +1,73 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i1 @external_i1_func_void() #0
-declare zeroext i1 @external_i1_zeroext_func_void() #0
-declare signext i1 @external_i1_signext_func_void() #0
-
-declare i8 @external_i8_func_void() #0
-declare zeroext i8 @external_i8_zeroext_func_void() #0
-declare signext i8 @external_i8_signext_func_void() #0
-
-declare i16 @external_i16_func_void() #0
-declare <2 x i16> @external_v2i16_func_void() #0
-declare <3 x i16> @external_v3i16_func_void() #0
-declare <4 x i16> @external_v4i16_func_void() #0
-declare zeroext i16 @external_i16_zeroext_func_void() #0
-declare signext i16 @external_i16_signext_func_void() #0
-
-declare i48 @external_i48_func_void() #0
-declare zeroext i48 @external_i48_zeroext_func_void() #0
-declare signext i48 @external_i48_signext_func_void() #0
-
-declare i32 @external_i32_func_void() #0
-declare i64 @external_i64_func_void() #0
-declare half @external_f16_func_void() #0
-declare float @external_f32_func_void() #0
-declare double @external_f64_func_void() #0
-
-declare ptr addrspace(1) @external_p1_func_void() #0
-declare <2 x ptr addrspace(1)> @external_v2p1_func_void() #0
-
-declare ptr addrspace(3) @external_p3_func_void() #0
-declare <2 x ptr addrspace(3)> @external_v2p3_func_void() #0
-
-declare <2 x half> @external_v2f16_func_void() #0
-declare <3 x half> @external_v3f16_func_void() #0
-declare <4 x half> @external_v4f16_func_void() #0
-declare <3 x float> @external_v3f32_func_void() #0
-declare <5 x float> @external_v5f32_func_void() #0
-declare <2 x double> @external_v2f64_func_void() #0
-
-declare <2 x i32> @external_v2i32_func_void() #0
-declare <3 x i32> @external_v3i32_func_void() #0
-declare <4 x i32> @external_v4i32_func_void() #0
-declare <5 x i32> @external_v5i32_func_void() #0
-declare <8 x i32> @external_v8i32_func_void() #0
-declare <16 x i32> @external_v16i32_func_void() #0
-declare <32 x i32> @external_v32i32_func_void() #0
-declare <33 x i32> @external_v33i32_func_void() #0
-declare <33 x i32> @external_v33i32_func_v33i32_i32(ptr addrspace(1), i32) #0
-declare { <32 x i32>, i32 } @external_v32i32_i32_func_void() #0
-declare { i32, <32 x i32> } @external_i32_v32i32_func_void() #0
-
-declare { i32, i64 } @external_i32_i64_func_void() #0
-
-declare [2 x i32] @external_a2i32_func_void() #0
-declare [5 x i8] @external_a5i8_func_void() #0
+declare i1 @external_i1_func_void() nounwind
+declare zeroext i1 @external_i1_zeroext_func_void() nounwind
+declare signext i1 @external_i1_signext_func_void() nounwind
+
+declare i8 @external_i8_func_void() nounwind
+declare zeroext i8 @external_i8_zeroext_func_void() nounwind
+declare signext i8 @external_i8_signext_func_void() nounwind
+
+declare i16 @external_i16_func_void() nounwind
+declare <2 x i16> @external_v2i16_func_void() nounwind
+declare <3 x i16> @external_v3i16_func_void() nounwind
+declare <4 x i16> @external_v4i16_func_void() nounwind
+declare zeroext i16 @external_i16_zeroext_func_void() nounwind
+declare signext i16 @external_i16_signext_func_void() nounwind
+
+declare i48 @external_i48_func_void() nounwind
+declare zeroext i48 @external_i48_zeroext_func_void() nounwind
+declare signext i48 @external_i48_signext_func_void() nounwind
+
+declare i32 @external_i32_func_void() nounwind
+declare i64 @external_i64_func_void() nounwind
+declare half @external_f16_func_void() nounwind
+declare float @external_f32_func_void() nounwind
+declare double @external_f64_func_void() nounwind
+
+declare ptr addrspace(1) @external_p1_func_void() nounwind
+declare <2 x ptr addrspace(1)> @external_v2p1_func_void() nounwind
+
+declare ptr addrspace(3) @external_p3_func_void() nounwind
+declare <2 x ptr addrspace(3)> @external_v2p3_func_void() nounwind
+
+declare <2 x half> @external_v2f16_func_void() nounwind
+declare <3 x half> @external_v3f16_func_void() nounwind
+declare <4 x half> @external_v4f16_func_void() nounwind
+declare <3 x float> @external_v3f32_func_void() nounwind
+declare <5 x float> @external_v5f32_func_void() nounwind
+declare <2 x double> @external_v2f64_func_void() nounwind
+
+declare <2 x i32> @external_v2i32_func_void() nounwind
+declare <3 x i32> @external_v3i32_func_void() nounwind
+declare <4 x i32> @external_v4i32_func_void() nounwind
+declare <5 x i32> @external_v5i32_func_void() nounwind
+declare <8 x i32> @external_v8i32_func_void() nounwind
+declare <16 x i32> @external_v16i32_func_void() nounwind
+declare <32 x i32> @external_v32i32_func_void() nounwind
+declare <33 x i32> @external_v33i32_func_void() nounwind
+declare <33 x i32> @external_v33i32_func_v33i32_i32(ptr addrspace(1), i32) nounwind
+declare { <32 x i32>, i32 } @external_v32i32_i32_func_void() nounwind
+declare { i32, <32 x i32> } @external_i32_v32i32_func_void() nounwind
+
+declare { i32, i64 } @external_i32_i64_func_void() nounwind
+
+declare [2 x i32] @external_a2i32_func_void() nounwind
+declare [5 x i8] @external_a5i8_func_void() nounwind
 
 ; return value and argument
-declare hidden i32 @external_i32_func_i32(i32) #0
+declare hidden i32 @external_i32_func_i32(i32) nounwind
 
 ; amdgpu_gfx calling convention
-declare i1 @external_gfx_i1_func_void() #0
-declare i8 @external_gfx_i8_func_void() #0
-declare i32 @external_gfx_i32_func_void() #0
-declare { i32, i64 } @external_gfx_i32_i64_func_void() #0
-declare hidden i32 @external_gfx_i32_func_i32(i32) #0
+declare i1 @external_gfx_i1_func_void() nounwind
+declare i8 @external_gfx_i8_func_void() nounwind
+declare i32 @external_gfx_i32_func_void() nounwind
+declare { i32, i64 } @external_gfx_i32_i64_func_void() nounwind
+declare hidden i32 @external_gfx_i32_func_i32(i32) nounwind
 
 
-define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) nounwind {
   ; GCN-LABEL: name: test_call_external_i32_func_i32_imm
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -127,7 +127,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
+define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1) %out) nounwind {
   ; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
@@ -151,7 +151,7 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i1_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i1_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -209,7 +209,7 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
+define amdgpu_gfx void @test_gfx_call_external_i1_func_void() nounwind {
   ; GCN-LABEL: name: test_gfx_call_external_i1_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -228,7 +228,7 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i1_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -289,7 +289,7 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i1_signext_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i1_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -350,7 +350,7 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i8_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i8_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -409,7 +409,7 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
+define amdgpu_gfx void @test_gfx_call_external_i8_func_void() nounwind {
   ; GCN-LABEL: name: test_gfx_call_external_i8_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -429,7 +429,7 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i8_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -490,7 +490,7 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i8_signext_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i8_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -551,7 +551,7 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i16_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i16_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -609,7 +609,7 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i16_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -670,7 +670,7 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i16_signext_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i16_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -731,7 +731,7 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -788,7 +788,7 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
+define amdgpu_gfx void @test_gfx_call_external_i32_func_void() nounwind {
   ; GCN-LABEL: name: test_gfx_call_external_i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -806,7 +806,7 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i48_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i48_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -866,7 +866,7 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i48_zeroext_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -928,7 +928,7 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i48_signext_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i48_signext_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -990,7 +990,7 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i64_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i64_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1049,7 +1049,7 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
+define amdgpu_kernel void @test_call_external_p1_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_p1_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1108,7 +1108,7 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2p1_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v2p1_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1171,7 +1171,7 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
+define amdgpu_kernel void @test_call_external_p3_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_p3_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1228,7 +1228,7 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2p3_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v2p3_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1287,7 +1287,7 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_f16_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_f16_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1345,7 +1345,7 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_f32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_f32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1402,7 +1402,7 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
+define amdgpu_kernel void @test_call_external_f64_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_f64_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1461,7 +1461,7 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2f64_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v2f64_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1524,7 +1524,7 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v2i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1583,7 +1583,7 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v3i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v3i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1643,7 +1643,7 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v4i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v4i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1704,7 +1704,7 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v5i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v5i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1766,7 +1766,7 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v8i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v8i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1831,7 +1831,7 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v16i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v16i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1904,7 +1904,7 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v32i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v32i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1993,7 +1993,7 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2i16_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v2i16_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2050,7 +2050,7 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v3i16_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v3i16_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2111,7 +2111,7 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v4i16_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v4i16_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2170,7 +2170,7 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2f16_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v2f16_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2227,7 +2227,7 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v3f16_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v3f16_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2288,7 +2288,7 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v4f16_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v4f16_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2347,7 +2347,7 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v3f32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v3f32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2407,7 +2407,7 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v5f32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v5f32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2470,7 +2470,7 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
 }
 
 
-define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i32_i64_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i32_i64_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2534,7 +2534,7 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
+define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() nounwind {
   ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
@@ -2559,7 +2559,7 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_a2i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_a2i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2621,7 +2621,7 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
+define amdgpu_kernel void @test_call_external_a5i8_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_a5i8_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2705,7 +2705,7 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v32i32_i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2771,7 +2771,7 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_i32_v32i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2837,7 +2837,7 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v33i32_func_void() nounwind {
   ; GCN-LABEL: name: test_call_external_v33i32_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2896,7 +2896,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 {
+define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) nounwind {
   ; GCN-LABEL: name: test_call_external_v33i32_func_v33i32_i32
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2964,9 +2964,5 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
   ret void
 }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 75670604baa1ac..aaf492c46ed51e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -1,118 +1,118 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
 
-declare hidden void @external_void_func_void() #0
+declare hidden void @external_void_func_void() nounwind
 
-declare hidden void @external_void_func_empty_struct({}, i32) #0
-declare hidden void @external_void_func_empty_array([0 x i8], i32) #0
+declare hidden void @external_void_func_empty_struct({}, i32) nounwind
+declare hidden void @external_void_func_empty_array([0 x i8], i32) nounwind
 
-declare hidden void @external_void_func_i1(i1) #0
-declare hidden void @external_void_func_i1_signext(i1 signext) #0
-declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0
+declare hidden void @external_void_func_i1(i1) nounwind
+declare hidden void @external_void_func_i1_signext(i1 signext) nounwind
+declare hidden void @external_void_func_i1_zeroext(i1 zeroext) nounwind
 
-declare hidden void @external_void_func_i8(i8) #0
-declare hidden void @external_void_func_i8_signext(i8 signext) #0
-declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0
+declare hidden void @external_void_func_i8(i8) nounwind
+declare hidden void @external_void_func_i8_signext(i8 signext) nounwind
+declare hidden void @external_void_func_i8_zeroext(i8 zeroext) nounwind
 
-declare hidden void @external_void_func_i16(i16) #0
-declare hidden void @external_void_func_i16_signext(i16 signext) #0
-declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0
+declare hidden void @external_void_func_i16(i16) nounwind
+declare hidden void @external_void_func_i16_signext(i16 signext) nounwind
+declare hidden void @external_void_func_i16_zeroext(i16 zeroext) nounwind
 
-declare hidden void @external_void_func_i32(i32) #0
-declare hidden void @external_void_func_i64(i64) #0
-declare hidden void @external_void_func_v2i64(<2 x i64>) #0
-declare hidden void @external_void_func_v3i64(<3 x i64>) #0
-declare hidden void @external_void_func_v4i64(<4 x i64>) #0
+declare hidden void @external_void_func_i32(i32) nounwind
+declare hidden void @external_void_func_i64(i64) nounwind
+declare hidden void @external_void_func_v2i64(<2 x i64>) nounwind
+declare hidden void @external_void_func_v3i64(<3 x i64>) nounwind
+declare hidden void @external_void_func_v4i64(<4 x i64>) nounwind
 
 
-declare hidden void @external_void_func_i48(i48) #0
-declare hidden void @external_void_func_i48_signext(i48 signext) #0
-declare hidden void @external_void_func_i48_zeroext(i48 zeroext) #0
+declare hidden void @external_void_func_i48(i48) nounwind
+declare hidden void @external_void_func_i48_signext(i48 signext) nounwind
+declare hidden void @external_void_func_i48_zeroext(i48 zeroext) nounwind
 
-declare hidden void @external_void_func_p0(ptr) #0
-declare hidden void @external_void_func_v2p0(<2 x ptr>) #0
+declare hidden void @external_void_func_p0(ptr) nounwind
+declare hidden void @external_void_func_v2p0(<2 x ptr>) nounwind
 
-declare hidden void @external_void_func_f16(half) #0
-declare hidden void @external_void_func_f32(float) #0
-declare hidden void @external_void_func_f64(double) #0
-declare hidden void @external_void_func_v2f32(<2 x float>) #0
-declare hidden void @external_void_func_v2f64(<2 x double>) #0
-declare hidden void @external_void_func_v3f32(<3 x float>) #0
-declare hidden void @external_void_func_v3f64(<3 x double>) #0
-declare hidden void @external_void_func_v5f32(<5 x float>) #0
+declare hidden void @external_void_func_f16(half) nounwind
+declare hidden void @external_void_func_f32(float) nounwind
+declare hidden void @external_void_func_f64(double) nounwind
+declare hidden void @external_void_func_v2f32(<2 x float>) nounwind
+declare hidden void @external_void_func_v2f64(<2 x double>) nounwind
+declare hidden void @external_void_func_v3f32(<3 x float>) nounwind
+declare hidden void @external_void_func_v3f64(<3 x double>) nounwind
+declare hidden void @external_void_func_v5f32(<5 x float>) nounwind
 
-declare hidden void @external_void_func_v2i16(<2 x i16>) #0
-declare hidden void @external_void_func_v2f16(<2 x half>) #0
-declare hidden void @external_void_func_v3i16(<3 x i16>) #0
-declare hidden void @external_void_func_v3f16(<3 x half>) #0
-declare hidden void @external_void_func_v4i16(<4 x i16>) #0
-declare hidden void @external_void_func_v4f16(<4 x half>) #0
-declare hidden void @external_void_func_v5i16(<5 x i16>) #0
-declare hidden void @external_void_func_v7i16(<7 x i16>) #0
-declare hidden void @external_void_func_v63i16(<63 x i16>) #0
-declare hidden void @external_void_func_v65i16(<65 x i16>) #0
-declare hidden void @external_void_func_v66i16(<66 x i16>) #0
+declare hidden void @external_void_func_v2i16(<2 x i16>) nounwind
+declare hidden void @external_void_func_v2f16(<2 x half>) nounwind
+declare hidden void @external_void_func_v3i16(<3 x i16>) nounwind
+declare hidden void @external_void_func_v3f16(<3 x half>) nounwind
+declare hidden void @external_void_func_v4i16(<4 x i16>) nounwind
+declare hidden void @external_void_func_v4f16(<4 x half>) nounwind
+declare hidden void @external_void_func_v5i16(<5 x i16>) nounwind
+declare hidden void @external_void_func_v7i16(<7 x i16>) nounwind
+declare hidden void @external_void_func_v63i16(<63 x i16>) nounwind
+declare hidden void @external_void_func_v65i16(<65 x i16>) nounwind
+declare hidden void @external_void_func_v66i16(<66 x i16>) nounwind
 
-declare hidden void @external_void_func_v2i32(<2 x i32>) #0
-declare hidden void @external_void_func_v3i32(<3 x i32>) #0
-declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
-declare hidden void @external_void_func_v4i32(<4 x i32>) #0
-declare hidden void @external_void_func_v5i32(<5 x i32>) #0
-declare hidden void @external_void_func_v8i32(<8 x i32>) #0
-declare hidden void @external_void_func_v16i32(<16 x i32>) #0
-declare hidden void @external_void_func_v32i32(<32 x i32>) #0
-declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
-declare hidden void @external_void_func_v32i32_p3_p5(<32 x i32>, ptr addrspace(3), ptr addrspace(5)) #0
-declare hidden void @external_void_func_v32i32_i8_i8_i16(<32 x i32>, i8, i8, i16) #0
+declare hidden void @external_void_func_v2i32(<2 x i32>) nounwind
+declare hidden void @external_void_func_v3i32(<3 x i32>) nounwind
+declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) nounwind
+declare hidden void @external_void_func_v4i32(<4 x i32>) nounwind
+declare hidden void @external_void_func_v5i32(<5 x i32>) nounwind
+declare hidden void @external_void_func_v8i32(<8 x i32>) nounwind
+declare hidden void @external_void_func_v16i32(<16 x i32>) nounwind
+declare hidden void @external_void_func_v32i32(<32 x i32>) nounwind
+declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) nounwind
+declare hidden void @external_void_func_v32i32_p3_p5(<32 x i32>, ptr addrspace(3), ptr addrspace(5)) nounwind
+declare hidden void @external_void_func_v32i32_i8_i8_i16(<32 x i32>, i8, i8, i16) nounwind
 
 ; Structs
-declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0
-declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
-declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
+declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) nounwind
+declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) nounwind
+declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) nounwind
 
-declare hidden void @external_void_func_v2i8(<2 x i8>) #0
-declare hidden void @external_void_func_v3i8(<3 x i8>) #0
-declare hidden void @external_void_func_v4i8(<4 x i8>) #0
-declare hidden void @external_void_func_v8i8(<8 x i8>) #0
-declare hidden void @external_void_func_v16i8(<16 x i8>) #0
+declare hidden void @external_void_func_v2i8(<2 x i8>) nounwind
+declare hidden void @external_void_func_v3i8(<3 x i8>) nounwind
+declare hidden void @external_void_func_v4i8(<4 x i8>) nounwind
+declare hidden void @external_void_func_v8i8(<8 x i8>) nounwind
+declare hidden void @external_void_func_v16i8(<16 x i8>) nounwind
 
-declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
-declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0
+declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) nounwind
+declare hidden void @stack_passed_f64_arg(<32 x i32>, double) nounwind
 declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
-    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
+    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) nounwind
 declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
-    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
+    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) nounwind
 declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
-    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
+    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) nounwind
 declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
-    <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
+    <5 x float>, <5 x float>, <5 x float>, <5 x float>) nounwind
 
-declare hidden void @external_void_func_i16_inreg(i32 inreg) #0
-declare hidden void @external_void_func_i32_inreg(i32 inreg) #0
-declare hidden void @external_void_func_i64_inreg(i64 inreg) #0
-declare hidden void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0
-declare hidden void @external_void_func_f16_inreg(half inreg) #0
-declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
-declare hidden void @external_void_func_f32_inreg(float inreg) #0
-declare hidden void @external_void_func_f64_inreg(double inreg) #0
-declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
-declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
-declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
+declare hidden void @external_void_func_i16_inreg(i32 inreg) nounwind
+declare hidden void @external_void_func_i32_inreg(i32 inreg) nounwind
+declare hidden void @external_void_func_i64_inreg(i64 inreg) nounwind
+declare hidden void @external_void_func_v2i32_inreg(<2 x i32> inreg) nounwind
+declare hidden void @external_void_func_f16_inreg(half inreg) nounwind
+declare hidden void @external_void_func_bf16_inreg(bfloat inreg) nounwind
+declare hidden void @external_void_func_f32_inreg(float inreg) nounwind
+declare hidden void @external_void_func_f64_inreg(double inreg) nounwind
+declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) nounwind
+declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) nounwind
+declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) nounwind
 
-declare hidden void @external_void_func_p0_inreg(ptr inreg) #0
-declare hidden void @external_void_func_p1_inreg(ptr addrspace(1) inreg) #0
-declare hidden void @external_void_func_p3_inreg(ptr addrspace(3) inreg) #0
-declare hidden void @external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg) #0
-declare hidden void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg) #0
+declare hidden void @external_void_func_p0_inreg(ptr inreg) nounwind
+declare hidden void @external_void_func_p1_inreg(ptr addrspace(1) inreg) nounwind
+declare hidden void @external_void_func_p3_inreg(ptr addrspace(3) inreg) nounwind
+declare hidden void @external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg) nounwind
+declare hidden void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg) nounwind
 
 ; amdgpu_gfx calling convention
-declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) #0
-declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) #0
+declare hidden amdgpu_gfx void @external_gfx_void_func_void() nounwind
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) nounwind
+declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) nounwind
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) nounwind
+declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) nounwind
 
-define amdgpu_kernel void @test_call_external_void_func_void() #0 {
+define amdgpu_kernel void @test_call_external_void_func_void() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -165,7 +165,7 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_void() nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
@@ -179,7 +179,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
   ret void
 }
 
-define void @test_func_call_external_void_func_void() #0 {
+define void @test_func_call_external_void_func_void() nounwind {
   ; CHECK-LABEL: name: test_func_call_external_void_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -222,7 +222,7 @@ define void @test_func_call_external_void_func_void() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
+define amdgpu_kernel void @test_call_external_void_func_empty_struct() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_empty_struct
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -277,7 +277,7 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
+define amdgpu_kernel void @test_call_external_void_func_empty_array() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_empty_array
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -332,7 +332,7 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_i1_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i1_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -388,7 +388,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i1_signext
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -447,7 +447,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i1_zeroext
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -506,7 +506,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i8_imm
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -564,7 +564,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i8_signext
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -624,7 +624,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i8_zeroext
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -684,7 +684,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_i16_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -740,7 +740,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_signext
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -799,7 +799,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_zeroext
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -858,7 +858,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i32_imm
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -914,7 +914,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -933,7 +933,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr4
@@ -952,7 +952,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_i64_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i64_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1009,7 +1009,7 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i64() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1070,7 +1070,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i64_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1131,7 +1131,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i48(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i48
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1192,7 +1192,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i48_signext
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1253,7 +1253,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i48_zeroext
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1314,7 +1314,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
+define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_p0_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1372,7 +1372,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2p0() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2p0
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1433,7 +1433,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i64() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1502,7 +1502,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i64() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1572,7 +1572,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_f16_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_f16_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1628,7 +1628,7 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_f32_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_f32_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1683,7 +1683,7 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f32_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1742,7 +1742,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f32_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1803,7 +1803,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v5f32_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1868,7 +1868,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_f64_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_f64_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1925,7 +1925,7 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f64_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -1986,7 +1986,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f64_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2050,7 +2050,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2107,7 +2107,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2169,7 +2169,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3f16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2231,7 +2231,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2290,7 +2290,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i16_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2351,7 +2351,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v5i16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v5i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2414,7 +2414,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v7i16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v7i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2478,7 +2478,7 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v63i16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v63i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2573,7 +2573,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v65i16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v65i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2671,7 +2671,7 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v66i16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v66i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2766,7 +2766,7 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2f16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2823,7 +2823,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i32() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2882,7 +2882,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i32_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -2941,7 +2941,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i32_imm
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3003,7 +3003,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i32_i32
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3067,7 +3067,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i32() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3128,7 +3128,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i32_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3191,7 +3191,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v5i32_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3256,7 +3256,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v8i32() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v8i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3323,7 +3323,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v8i32_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3394,7 +3394,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v16i32() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v16i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3469,7 +3469,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v32i32() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3563,7 +3563,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i32
   ; CHECK: bb.1 (%ir-block.1):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3664,7 +3664,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i8_i8_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3775,7 +3775,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v32i32_p3_p5
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3880,7 +3880,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -3945,7 +3945,7 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -3971,7 +3971,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
+define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() nounwind {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
@@ -3997,7 +3997,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_byval_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -4067,9 +4067,9 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
   ret void
 }
 
-declare void @void_func_byval_a3i32_byval_i8_align32(ptr addrspace(5) byval([3 x i32]) %arg0, ptr addrspace(5) byval(i8) align 32 %arg1, i32 %arg2) #0
+declare void @void_func_byval_a3i32_byval_i8_align32(ptr addrspace(5) byval([3 x i32]) %arg0, ptr addrspace(5) byval(i8) align 32 %arg1, i32 %arg2) nounwind
 
-define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr addrspace(5) align 32 %incoming1) #0 {
+define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr addrspace(5) align 32 %incoming1) nounwind {
   ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -4125,11 +4125,11 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr
   ret void
 }
 
-declare void @void_func_byval_a4i64_align4(ptr addrspace(5) byval([4 x i64]) align 4 %arg0) #0
+declare void @void_func_byval_a4i64_align4(ptr addrspace(5) byval([4 x i64]) align 4 %arg0) nounwind
 
 ; Make sure we are aware of the higher alignment of the incoming value
 ; than implied by the outgoing byval alignment in the memory operand.
-define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align 256 %incoming_high_align) #0 {
+define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align 256 %incoming_high_align) nounwind {
   ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -4178,7 +4178,7 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i8() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -4243,7 +4243,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i8() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v3i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -4311,7 +4311,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i8() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v4i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -4382,7 +4382,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v8i8() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v8i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -4465,7 +4465,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v16i8() nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v16i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -4572,7 +4572,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
   ret void
 }
 
-define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
+define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) nounwind {
   ; CHECK-LABEL: name: stack_passed_arg_alignment_v32i32_f64
   ; CHECK: bb.1.entry:
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
@@ -4674,7 +4674,7 @@ entry:
   ret void
 }
 
-define void @stack_12xv3i32() #0 {
+define void @stack_12xv3i32() nounwind {
   ; CHECK-LABEL: name: stack_12xv3i32
   ; CHECK: bb.1.entry:
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -4817,7 +4817,7 @@ entry:
   ret void
 }
 
-define void @stack_12xv3f32() #0 {
+define void @stack_12xv3f32() nounwind {
   ; CHECK-LABEL: name: stack_12xv3f32
   ; CHECK: bb.1.entry:
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -4960,7 +4960,7 @@ entry:
   ret void
 }
 
-define void @stack_8xv5i32() #0 {
+define void @stack_8xv5i32() nounwind {
   ; CHECK-LABEL: name: stack_8xv5i32
   ; CHECK: bb.1.entry:
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5103,7 +5103,7 @@ entry:
   ret void
 }
 
-define void @stack_8xv5f32() #0 {
+define void @stack_8xv5f32() nounwind {
   ; CHECK-LABEL: name: stack_8xv5f32
   ; CHECK: bb.1.entry:
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5281,7 +5281,7 @@ main_body:
   ret void
 }
 
-define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
+define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5328,7 +5328,7 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
+define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5373,7 +5373,7 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
+define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_i64_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5422,7 +5422,7 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
+define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2i32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5471,7 +5471,7 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
+define void @test_call_external_void_func_f16_inreg(half inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_f16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5518,7 +5518,7 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
+define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_bf16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5565,7 +5565,7 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
+define void @test_call_external_void_func_f32_inreg(float inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_f32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5610,7 +5610,7 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
+define void @test_call_external_void_func_f64_inreg(double inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_f64_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5659,7 +5659,7 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 {
+define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2f16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5704,7 +5704,7 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
   ret void
 }
 
-define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 {
+define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v3f16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5758,7 +5758,7 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
   ret void
 }
 
-define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 {
+define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v4f16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5807,7 +5807,7 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0
   ret void
 }
 
-define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
+define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_p0_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5856,7 +5856,7 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) #0 {
+define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_p1_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5905,7 +5905,7 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg)
   ret void
 }
 
-define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) #0 {
+define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_p3_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -5950,7 +5950,7 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
   ret void
 }
 
-define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) #0 {
+define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2p1_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -6005,7 +6005,7 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre
   ret void
 }
 
-define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) #0 {
+define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) nounwind {
   ; CHECK-LABEL: name: test_call_external_void_func_v2p5_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -6054,9 +6054,5 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre
   ret void
 }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
index 3a31ab4ab9d0ad..9cc4b37bda3fac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s
 
-define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -15,7 +15,7 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
   ret float %val
 }
 
-define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict_flags
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -29,7 +29,7 @@ define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0
   ret float %val
 }
 
-define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -43,7 +43,7 @@ define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 {
   ret float %val
 }
 
-define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore_flags
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -57,7 +57,7 @@ define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0
   ret float %val
 }
 
-define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_maytrap
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -71,7 +71,7 @@ define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 {
   ret float %val
 }
 
-define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_strict
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -91,7 +91,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_ignore
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -111,7 +111,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_maytrap
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -131,7 +131,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2
   ret <2 x float> %val
 }
 
-define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 {
+define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fsub_f32_fpexcept_ignore_flags
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -145,7 +145,7 @@ define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0
   ret float %val
 }
 
-define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 {
+define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fmul_f32_fpexcept_ignore_flags
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -159,7 +159,7 @@ define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0
   ret float %val
 }
 
-define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 {
+define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) strictfp {
   ; CHECK-LABEL: name: v_constained_fdiv_f32_fpexcept_ignore_flags
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -173,7 +173,7 @@ define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0
   ret float %val
 }
 
-define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 {
+define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) strictfp {
   ; CHECK-LABEL: name: v_constained_frem_f32_fpexcept_ignore_flags
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -187,7 +187,7 @@ define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0
   ret float %val
 }
 
-define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, float %z) #0 {
+define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, float %z) strictfp {
   ; CHECK-LABEL: name: v_constained_fma_f32_fpexcept_ignore_flags
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -202,7 +202,7 @@ define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, flo
   ret float %val
 }
 
-define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 {
+define float @v_constained_sqrt_f32_fpexcept_strict(float %x) strictfp {
   ; CHECK-LABEL: name: v_constained_sqrt_f32_fpexcept_strict
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -215,15 +215,12 @@ define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 {
   ret float %val
 }
 
-declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1
-declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1
-declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { inaccessiblememonly nounwind willreturn }
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) inaccessiblememonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
index 0b21c2112f05b8..8fd207f4d09295 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s
 ; FIXME: pre-VI should have same ABI without legal i16 operations.
 
-define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 {
+define void @void_func_empty_arg({} %arg0, i32 %arg1) nounwind {
   ; CHECK-LABEL: name: void_func_empty_arg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -18,7 +18,7 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 {
   ret void
 }
 
-define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 {
+define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) nounwind {
   ; CHECK-LABEL: name: void_func_empty_array
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -31,7 +31,7 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 {
   ret void
 }
 
-define void @void_func_i1(i1 %arg0) #0 {
+define void @void_func_i1(i1 %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i1
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -45,7 +45,7 @@ define void @void_func_i1(i1 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
+define void @void_func_i1_zeroext(i1 zeroext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i1_zeroext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -65,7 +65,7 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i1_signext(i1 signext %arg0) #0 {
+define void @void_func_i1_signext(i1 signext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i1_signext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -85,7 +85,7 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 {
   ret void
 }
 
-define void @i1_arg_i1_use(i1 %arg) #0 {
+define void @i1_arg_i1_use(i1 %arg) nounwind {
   ; CHECK-LABEL: name: i1_arg_i1_use
   ; CHECK: bb.1.bb:
   ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -121,7 +121,7 @@ bb2:
   ret void
 }
 
-define void @void_func_i8(i8 %arg0) #0 {
+define void @void_func_i8(i8 %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -136,7 +136,7 @@ define void @void_func_i8(i8 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
+define void @void_func_i8_zeroext(i8 zeroext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i8_zeroext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -156,7 +156,7 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i8_signext(i8 signext %arg0) #0 {
+define void @void_func_i8_signext(i8 signext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i8_signext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -176,7 +176,7 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i16(i16 %arg0) #0 {
+define void @void_func_i16(i16 %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -190,7 +190,7 @@ define void @void_func_i16(i16 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
+define void @void_func_i16_zeroext(i16 zeroext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i16_zeroext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -210,7 +210,7 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i16_signext(i16 signext %arg0) #0 {
+define void @void_func_i16_signext(i16 signext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i16_signext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -230,7 +230,7 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i24(i24 %arg0) #0 {
+define void @void_func_i24(i24 %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i24
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -244,7 +244,7 @@ define void @void_func_i24(i24 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 {
+define void @void_func_i24_zeroext(i24 zeroext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i24_zeroext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -259,7 +259,7 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i24_signext(i24 signext %arg0) #0 {
+define void @void_func_i24_signext(i24 signext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i24_signext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -274,7 +274,7 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i32(i32 %arg0) #0 {
+define void @void_func_i32(i32 %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -288,7 +288,7 @@ define void @void_func_i32(i32 %arg0) #0 {
 }
 
 ; The signext is an no-op
-define void @void_func_i32_signext(i32 signext %arg0) #0 {
+define void @void_func_i32_signext(i32 signext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i32_signext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -302,7 +302,7 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 {
 }
 
 ; The zeroext is an no-op
-define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 {
+define void @void_func_i32_zeroext(i32 zeroext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i32_zeroext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -315,7 +315,7 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_p3i8(ptr addrspace(3) %arg0) #0 {
+define void @void_func_p3i8(ptr addrspace(3) %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_p3i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -328,7 +328,7 @@ define void @void_func_p3i8(ptr addrspace(3) %arg0) #0 {
   ret void
 }
 
-define void @void_func_i48(i48 %arg0) #0 {
+define void @void_func_i48(i48 %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i48
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -344,7 +344,7 @@ define void @void_func_i48(i48 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 {
+define void @void_func_i48_zeroext(i48 zeroext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i48_zeroext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -365,7 +365,7 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i48_signext(i48 signext %arg0) #0 {
+define void @void_func_i48_signext(i48 signext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i48_signext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -386,7 +386,7 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i64(i64 %arg0) #0 {
+define void @void_func_i64(i64 %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -401,7 +401,7 @@ define void @void_func_i64(i64 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i95(i95 %arg0) #0 {
+define void @void_func_i95(i95 %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i95
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -418,7 +418,7 @@ define void @void_func_i95(i95 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 {
+define void @void_func_i95_zeroext(i95 zeroext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i95_zeroext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -440,7 +440,7 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i95_signext(i95 signext %arg0) #0 {
+define void @void_func_i95_signext(i95 signext %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i95_signext
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -462,7 +462,7 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i96(i96 %arg0) #0 {
+define void @void_func_i96(i96 %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i96
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -478,7 +478,7 @@ define void @void_func_i96(i96 %arg0) #0 {
   ret void
 }
 
-define void @void_func_p0i8(ptr %arg0) #0 {
+define void @void_func_p0i8(ptr %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_p0i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -493,7 +493,7 @@ define void @void_func_p0i8(ptr %arg0) #0 {
   ret void
 }
 
-define void @void_func_p1i8(ptr addrspace(1) %arg0) #0 {
+define void @void_func_p1i8(ptr addrspace(1) %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_p1i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -508,7 +508,7 @@ define void @void_func_p1i8(ptr addrspace(1) %arg0) #0 {
   ret void
 }
 
-define void @void_func_f16(half %arg0) #0 {
+define void @void_func_f16(half %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_f16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -522,7 +522,7 @@ define void @void_func_f16(half %arg0) #0 {
   ret void
 }
 
-define void @void_func_f32(float %arg0) #0 {
+define void @void_func_f32(float %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -535,7 +535,7 @@ define void @void_func_f32(float %arg0) #0 {
   ret void
 }
 
-define void @void_func_f64(double %arg0) #0 {
+define void @void_func_f64(double %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_f64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -550,7 +550,7 @@ define void @void_func_f64(double %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i32(<2 x i32> %arg0) #0 {
+define void @void_func_v2i32(<2 x i32> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -565,7 +565,7 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i24(<2 x i24> %arg0) #0 {
+define void @void_func_v2i24(<2 x i24> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i24
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -581,7 +581,7 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i24(<3 x i24> %arg0) #0 {
+define void @void_func_v3i24(<3 x i24> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v3i24
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -598,7 +598,7 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i8(<2 x i8> %arg0) #0 {
+define void @void_func_v2i8(<2 x i8> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -616,7 +616,7 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i8(<3 x i8> %arg0) #0 {
+define void @void_func_v3i8(<3 x i8> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v3i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -636,7 +636,7 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i8(<4 x i8> %arg0) #0 {
+define void @void_func_v4i8(<4 x i8> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v4i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -658,7 +658,7 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2p3i8(<2 x ptr addrspace(3)> %arg0) #0 {
+define void @void_func_v2p3i8(<2 x ptr addrspace(3)> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2p3i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -673,7 +673,7 @@ define void @void_func_v2p3i8(<2 x ptr addrspace(3)> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i32(<3 x i32> %arg0) #0 {
+define void @void_func_v3i32(<3 x i32> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v3i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -689,7 +689,7 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i32(<4 x i32> %arg0) #0 {
+define void @void_func_v4i32(<4 x i32> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v4i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -706,7 +706,7 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i32(<5 x i32> %arg0) #0 {
+define void @void_func_v5i32(<5 x i32> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v5i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
@@ -724,7 +724,7 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i32(<8 x i32> %arg0) #0 {
+define void @void_func_v8i32(<8 x i32> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v8i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -745,7 +745,7 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16i32(<16 x i32> %arg0) #0 {
+define void @void_func_v16i32(<16 x i32> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v16i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
@@ -774,7 +774,7 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v32i32(<32 x i32> %arg0) #0 {
+define void @void_func_v32i32(<32 x i32> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -821,7 +821,7 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 {
 }
 
 ; 1 over register limit
-define void @void_func_v33i32(<33 x i32> %arg0) #0 {
+define void @void_func_v33i32(<33 x i32> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v33i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -869,7 +869,7 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i64(<2 x i64> %arg0) #0 {
+define void @void_func_v2i64(<2 x i64> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -888,7 +888,7 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2p0i8(<2 x ptr> %arg0) #0 {
+define void @void_func_v2p0i8(<2 x ptr> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2p0i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -907,7 +907,7 @@ define void @void_func_v2p0i8(<2 x ptr> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2p1i8(<2 x ptr addrspace(1)> %arg0) #0 {
+define void @void_func_v2p1i8(<2 x ptr addrspace(1)> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2p1i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -926,7 +926,7 @@ define void @void_func_v2p1i8(<2 x ptr addrspace(1)> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i64(<3 x i64> %arg0) #0 {
+define void @void_func_v3i64(<3 x i64> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v3i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -948,7 +948,7 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i64(<4 x i64> %arg0) #0 {
+define void @void_func_v4i64(<4 x i64> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v4i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -973,7 +973,7 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i64(<5 x i64> %arg0) #0 {
+define void @void_func_v5i64(<5 x i64> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v5i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
@@ -1001,7 +1001,7 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i64(<8 x i64> %arg0) #0 {
+define void @void_func_v8i64(<8 x i64> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v8i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
@@ -1038,7 +1038,7 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16i64(<16 x i64> %arg0) #0 {
+define void @void_func_v16i64(<16 x i64> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v16i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -1100,7 +1100,7 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i16(<2 x i16> %arg0) #0 {
+define void @void_func_v2i16(<2 x i16> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1113,7 +1113,7 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i16(<3 x i16> %arg0) #0 {
+define void @void_func_v3i16(<3 x i16> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v3i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -1130,7 +1130,7 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i16(<4 x i16> %arg0) #0 {
+define void @void_func_v4i16(<4 x i16> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v4i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -1145,7 +1145,7 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i16(<5 x i16> %arg0) #0 {
+define void @void_func_v5i16(<5 x i16> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v5i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -1163,7 +1163,7 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i16(<8 x i16> %arg0) #0 {
+define void @void_func_v8i16(<8 x i16> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v8i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1180,7 +1180,7 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16i16(<16 x i16> %arg0) #0 {
+define void @void_func_v16i16(<16 x i16> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v16i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -1203,7 +1203,7 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {
 
 ; <2 x i16> pieces that start spilling to the stack.
 ; FIXME: load of 2 would be sufficient for last piece
-define void @void_func_v65i16(<65 x i16> %arg0) #0 {
+define void @void_func_v65i16(<65 x i16> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v65i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -1253,7 +1253,7 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f32(<2 x float> %arg0) #0 {
+define void @void_func_v2f32(<2 x float> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -1268,7 +1268,7 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3f32(<3 x float> %arg0) #0 {
+define void @void_func_v3f32(<3 x float> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v3f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -1284,7 +1284,7 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4f32(<4 x float> %arg0) #0 {
+define void @void_func_v4f32(<4 x float> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v4f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1301,7 +1301,7 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8f32(<8 x float> %arg0) #0 {
+define void @void_func_v8f32(<8 x float> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v8f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -1322,7 +1322,7 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16f32(<16 x float> %arg0) #0 {
+define void @void_func_v16f32(<16 x float> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v16f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
@@ -1351,7 +1351,7 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f64(<2 x double> %arg0) #0 {
+define void @void_func_v2f64(<2 x double> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2f64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1370,7 +1370,7 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3f64(<3 x double> %arg0) #0 {
+define void @void_func_v3f64(<3 x double> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v3f64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -1392,7 +1392,7 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4f64(<4 x double> %arg0) #0 {
+define void @void_func_v4f64(<4 x double> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v4f64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -1417,7 +1417,7 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8f64(<8 x double> %arg0) #0 {
+define void @void_func_v8f64(<8 x double> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v8f64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
@@ -1454,7 +1454,7 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16f64(<16 x double> %arg0) #0 {
+define void @void_func_v16f64(<16 x double> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v16f64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -1516,7 +1516,7 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f16(<2 x half> %arg0) #0 {
+define void @void_func_v2f16(<2 x half> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2f16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1529,7 +1529,7 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3f16(<3 x half> %arg0) #0 {
+define void @void_func_v3f16(<3 x half> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v3f16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -1546,7 +1546,7 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4f16(<4 x half> %arg0) #0 {
+define void @void_func_v4f16(<4 x half> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v4f16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -1561,7 +1561,7 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8f16(<8 x half> %arg0) #0 {
+define void @void_func_v8f16(<8 x half> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v8f16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1578,7 +1578,7 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16f16(<16 x half> %arg0) #0 {
+define void @void_func_v16f16(<16 x half> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v16f16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -1600,7 +1600,7 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 {
 }
 
 ; Make sure there is no alignment requirement for passed vgprs.
-define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
+define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_i32_i64_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1621,7 +1621,7 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
   ret void
 }
 
-define void @void_func_struct_i32({ i32 } %arg0) #0 {
+define void @void_func_struct_i32({ i32 } %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_struct_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1634,7 +1634,7 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 {
   ret void
 }
 
-define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
+define void @void_func_struct_i8_i32({ i8, i32 } %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -1653,7 +1653,7 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
   ret void
 }
 
-define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_byval_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
@@ -1673,7 +1673,7 @@ define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %
   ret void
 }
 
-define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
+define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1709,7 +1709,7 @@ define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }
   ret void
 }
 
-define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 {
+define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) nounwind {
   ; CHECK-LABEL: name: void_func_byval_i32_byval_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -1729,7 +1729,7 @@ define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, pt
   ret void
 }
 
-define void @void_func_byval_i8_align32_i16_align64(ptr addrspace(5) byval(i8) %arg0, ptr addrspace(5) byval(i16) align 64 %arg1) #0 {
+define void @void_func_byval_i8_align32_i16_align64(ptr addrspace(5) byval(i8) %arg0, ptr addrspace(5) byval(i16) align 64 %arg1) nounwind {
   ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -1750,7 +1750,7 @@ define void @void_func_byval_i8_align32_i16_align64(ptr addrspace(5) byval(i8) %
 }
 
 ; Make sure the alignment is taken from the correct parameter.
-define void @byval_a3i32_align128_byval_i16_align64(ptr addrspace(5) byval([3 x i32]) align 128 %arg0, ptr addrspace(5) byval(i16) align 64 %arg1) #0 {
+define void @byval_a3i32_align128_byval_i16_align64(ptr addrspace(5) byval([3 x i32]) align 128 %arg0, ptr addrspace(5) byval(i16) align 64 %arg1) nounwind {
   ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -1783,7 +1783,7 @@ define void @byval_a3i32_align128_byval_i16_align64(ptr addrspace(5) byval([3 x
 }
 
 ; byval argument after non-byval stack passed argument
-define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, ptr addrspace(5) byval(i8) align 8 %arg2) #0 {
+define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, ptr addrspace(5) byval(i8) align 8 %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_i32_byval_i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -1838,7 +1838,7 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, ptr addr
 }
 
 ; byval argument before non-byval stack passed argument
-define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, ptr addrspace(5) byval(i8) %arg1, i32 %arg2) #0 {
+define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, ptr addrspace(5) byval(i8) %arg1, i32 %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_byval_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -1892,7 +1892,7 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, ptr addrspace(5) by
   ret void
 }
 
-define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
+define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_i32_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -1950,7 +1950,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0
 }
 
 ; FIXME: Different ext load types on CI vs. VI
-define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 {
+define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2014,7 +2014,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1
   ret void
 }
 
-define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, ptr addrspace(3) %arg1, ptr addrspace(5) %arg2) #0 {
+define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, ptr addrspace(3) %arg1, ptr addrspace(5) %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_p3_p5_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2068,7 +2068,7 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, ptr addrspace(3) %arg1
   ret void
 }
 
-define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
+define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2128,7 +2128,7 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2
   ret void
 }
 
-define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 {
+define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2182,7 +2182,7 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2
   ret void
 }
 
-define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
+define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2254,7 +2254,7 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2
   ret void
 }
 
-define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
+define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2322,7 +2322,7 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4
   ret void
 }
 
-define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
+define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2406,7 +2406,7 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
   ret void
 }
 
-define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
+define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2523,7 +2523,7 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,
 }
 
 ; Make sure v3 isn't a wasted register because of v3 types being promoted to v4
-define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
+define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) nounwind {
   ; CHECK-LABEL: name: void_func_v3f32_wasted_reg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -2555,7 +2555,7 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
   ret void
 }
 
-define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
+define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) nounwind {
   ; CHECK-LABEL: name: void_func_v3i32_wasted_reg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -2588,7 +2588,7 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
 }
 
 ; Check there is no crash.
-define void @void_func_v16i8(<16 x i8> %arg0) #0 {
+define void @void_func_v16i8(<16 x i8> %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v16i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
@@ -2635,7 +2635,7 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 {
 }
 
 ; Check there is no crash.
-define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
+define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) nounwind {
   ; CHECK-LABEL: name: void_func_v32i32_v16i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2777,7 +2777,7 @@ define void @vector_ptr_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x ptr addrspa
   ret void
 }
 
-define void @void_func_i1_inreg(i1 inreg %arg0) #0 {
+define void @void_func_i1_inreg(i1 inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i1_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -2791,7 +2791,7 @@ define void @void_func_i1_inreg(i1 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i8_inreg(i8 inreg %arg0) #0 {
+define void @void_func_i8_inreg(i8 inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i8_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -2806,7 +2806,7 @@ define void @void_func_i8_inreg(i8 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i16_inreg(i16 inreg %arg0) #0 {
+define void @void_func_i16_inreg(i16 inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -2820,7 +2820,7 @@ define void @void_func_i16_inreg(i16 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i32_inreg(i32 inreg %arg0) #0 {
+define void @void_func_i32_inreg(i32 inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -2833,7 +2833,7 @@ define void @void_func_i32_inreg(i32 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i48_inreg(i48 inreg %arg0) #0 {
+define void @void_func_i48_inreg(i48 inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i48_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -2849,7 +2849,7 @@ define void @void_func_i48_inreg(i48 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i64_inreg(i64 inreg %arg0) #0 {
+define void @void_func_i64_inreg(i64 inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i64_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -2864,7 +2864,7 @@ define void @void_func_i64_inreg(i64 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i96_inreg(i96 inreg %arg0) #0 {
+define void @void_func_i96_inreg(i96 inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i96_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17, $sgpr18
@@ -2880,7 +2880,7 @@ define void @void_func_i96_inreg(i96 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i128_inreg(i128 inreg %arg0) #0 {
+define void @void_func_i128_inreg(i128 inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_i128_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19
@@ -2897,7 +2897,7 @@ define void @void_func_i128_inreg(i128 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_f16_inreg(half inreg %arg0) #0 {
+define void @void_func_f16_inreg(half inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_f16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -2911,7 +2911,7 @@ define void @void_func_f16_inreg(half inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 {
+define void @void_func_bf16_inreg(bfloat inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_bf16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -2925,7 +2925,7 @@ define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_f32_inreg(float inreg %arg0) #0 {
+define void @void_func_f32_inreg(float inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_f32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -2938,7 +2938,7 @@ define void @void_func_f32_inreg(float inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_f64_inreg(double inreg %arg0) #0 {
+define void @void_func_f64_inreg(double inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_f64_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -2953,7 +2953,7 @@ define void @void_func_f64_inreg(double inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i1_inreg(<2 x i1> inreg %arg0) #0 {
+define void @void_func_v2i1_inreg(<2 x i1> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i1_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -2972,7 +2972,7 @@ define void @void_func_v2i1_inreg(<2 x i1> inreg %arg0) #0 {
 }
 
 
-define void @void_func_v2i8_inreg(<2 x i8> inreg %arg0) #0 {
+define void @void_func_v2i8_inreg(<2 x i8> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i8_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -2990,7 +2990,7 @@ define void @void_func_v2i8_inreg(<2 x i8> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 {
+define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -3003,7 +3003,7 @@ define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 {
+define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2f16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -3016,7 +3016,7 @@ define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 {
+define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2bf16_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -3030,7 +3030,7 @@ define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 {
+define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -3045,7 +3045,7 @@ define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 {
+define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2f32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -3060,7 +3060,7 @@ define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 {
+define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2i64_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19
@@ -3079,7 +3079,7 @@ define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 {
+define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2f64_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19
@@ -3099,17 +3099,17 @@ define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 {
 }
 
 ; FIXME: Broken, see issue #78121
-; define void @void_func_v2i128_inreg(<2 x i128> inreg %arg0) #0 {
+; define void @void_func_v2i128_inreg(<2 x i128> inreg %arg0) nounwind {
 ;   store <2 x i128> %arg0, ptr addrspace(1) undef
 ;   ret void
 ; }
 
-; define void @void_func_v2f128_inreg(<2 x fp128> inreg %arg0) #0 {
+; define void @void_func_v2f128_inreg(<2 x fp128> inreg %arg0) nounwind {
 ;   store <2 x fp128> %arg0, ptr addrspace(1) undef
 ;   ret void
 ; }
 
-define void @void_func_p0_inreg(ptr inreg %arg0) #0 {
+define void @void_func_p0_inreg(ptr inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_p0_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -3124,7 +3124,7 @@ define void @void_func_p0_inreg(ptr inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_p1_inreg(ptr addrspace(1) inreg %arg0) #0 {
+define void @void_func_p1_inreg(ptr addrspace(1) inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_p1_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -3139,7 +3139,7 @@ define void @void_func_p1_inreg(ptr addrspace(1) inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_p3_inreg(ptr addrspace(3) inreg %arg0) #0 {
+define void @void_func_p3_inreg(ptr addrspace(3) inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_p3_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -3152,7 +3152,7 @@ define void @void_func_p3_inreg(ptr addrspace(3) inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_p5_inreg(ptr addrspace(5) inreg %arg0) #0 {
+define void @void_func_p5_inreg(ptr addrspace(5) inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_p5_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16
@@ -3165,7 +3165,7 @@ define void @void_func_p5_inreg(ptr addrspace(5) inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_p999_inreg(ptr addrspace(999) inreg %arg0) #0 {
+define void @void_func_p999_inreg(ptr addrspace(999) inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_p999_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -3180,7 +3180,7 @@ define void @void_func_p999_inreg(ptr addrspace(999) inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2p0_inreg(<2 x ptr> inreg %arg0) #0 {
+define void @void_func_v2p0_inreg(<2 x ptr> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2p0_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19
@@ -3199,7 +3199,7 @@ define void @void_func_v2p0_inreg(<2 x ptr> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg0) #0 {
+define void @void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2p1_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17, $sgpr18, $sgpr19
@@ -3218,7 +3218,7 @@ define void @void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2p3_inreg(<2 x ptr addrspace(3)> inreg %arg0) #0 {
+define void @void_func_v2p3_inreg(<2 x ptr addrspace(3)> inreg %arg0) nounwind {
   ; CHECK-LABEL: name: void_func_v2p3_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr17
@@ -3233,7 +3233,5 @@ define void @void_func_v2p3_inreg(<2 x ptr addrspace(3)> inreg %arg0) #0 {
   ret void
 }
 
-attributes #0 = { nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll
index ec07b0b1d4f454..91db9cf4cf6d02 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll
@@ -108,8 +108,6 @@ define void @test_memmove_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace(
   ret void
 }
 
-declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memcpy.inline.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memmove.p1.p4.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) #0
-
-attributes #0 = { argmemonly nofree nounwind willreturn }
+declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) argmemonly nofree nounwind willreturn
+declare void @llvm.memcpy.inline.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) argmemonly nofree nounwind willreturn
+declare void @llvm.memmove.p1.p4.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) argmemonly nofree nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
index b83b8a0a6d7d44..c294e357d419a5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
@@ -253,21 +253,18 @@ define void @test_memset_p3_i16(ptr addrspace(3) %dst, i8 %val) {
   ret void
 }
 
-declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i32, i1 immarg) #0
-declare void @llvm.memcpy.p1.p3.i16(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i16, i1 immarg) #0
-declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i32, i1 immarg) #0
-declare void @llvm.memcpy.p3.p1.i16(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i16, i1 immarg) #0
-declare void @llvm.memmove.p1.p3.i64(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1 immarg) #0
-declare void @llvm.memmove.p1.p3.i16(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i16, i1 immarg) #0
-declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg) #1
-declare void @llvm.memset.p1.i32(ptr addrspace(1) nocapture writeonly, i8, i32, i1 immarg) #1
-declare void @llvm.memset.p1.i16(ptr addrspace(1) nocapture writeonly, i8, i16, i1 immarg) #1
-declare void @llvm.memset.p3.i64(ptr addrspace(3) nocapture writeonly, i8, i64, i1 immarg) #1
-declare void @llvm.memset.p3.i32(ptr addrspace(3) nocapture writeonly, i8, i32, i1 immarg) #1
-declare void @llvm.memset.p3.i16(ptr addrspace(3) nocapture writeonly, i8, i16, i1 immarg) #1
-
-attributes #0 = { argmemonly nounwind willreturn }
-attributes #1 = { argmemonly nounwind willreturn writeonly }
+declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) argmemonly nounwind willreturn
+declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i32, i1 immarg) argmemonly nounwind willreturn
+declare void @llvm.memcpy.p1.p3.i16(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i16, i1 immarg) argmemonly nounwind willreturn
+declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) argmemonly nounwind willreturn
+declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i32, i1 immarg) argmemonly nounwind willreturn
+declare void @llvm.memcpy.p3.p1.i16(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i16, i1 immarg) argmemonly nounwind willreturn
+declare void @llvm.memmove.p1.p3.i64(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i64, i1 immarg) argmemonly nounwind willreturn
+declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1 immarg) argmemonly nounwind willreturn
+declare void @llvm.memmove.p1.p3.i16(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i16, i1 immarg) argmemonly nounwind willreturn
+declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg) argmemonly nounwind willreturn writeonly
+declare void @llvm.memset.p1.i32(ptr addrspace(1) nocapture writeonly, i8, i32, i1 immarg) argmemonly nounwind willreturn writeonly
+declare void @llvm.memset.p1.i16(ptr addrspace(1) nocapture writeonly, i8, i16, i1 immarg) argmemonly nounwind willreturn writeonly
+declare void @llvm.memset.p3.i64(ptr addrspace(3) nocapture writeonly, i8, i64, i1 immarg) argmemonly nounwind willreturn writeonly
+declare void @llvm.memset.p3.i32(ptr addrspace(3) nocapture writeonly, i8, i32, i1 immarg) argmemonly nounwind willreturn writeonly
+declare void @llvm.memset.p3.i16(ptr addrspace(3) nocapture writeonly, i8, i16, i1 immarg) argmemonly nounwind willreturn writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll
index 101bb6c0ed1235..e032679eab8419 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll
@@ -17,8 +17,6 @@ define i32 @reloc_constant() {
   ret i32 %res
 }
 
-declare i32 @llvm.amdgcn.reloc.constant(metadata) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i32 @llvm.amdgcn.reloc.constant(metadata) nounwind readnone speculatable willreturn
 
 !0 = !{!"arst"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll
index 2eab2a458e11e4..34b143836a9106 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll
@@ -10,12 +10,9 @@
 define amdgpu_kernel void @getreg_callsite_attributes() {
   %reg0 = call i32 @llvm.amdgcn.s.getreg(i32 0)
   store volatile i32 %reg0, ptr addrspace(1) undef
-  %reg1 = call i32 @llvm.amdgcn.s.getreg(i32 0) #1
+  %reg1 = call i32 @llvm.amdgcn.s.getreg(i32 0) nounwind readnone
   store volatile i32 %reg1, ptr addrspace(1) undef
   ret void
 }
 
-declare i32 @llvm.amdgcn.s.getreg(i32) #0
-
-attributes #0 = { nounwind readonly inaccessiblememonly }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.s.getreg(i32) nounwind readonly inaccessiblememonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 02bf7725015151..ad9b7adfd30bc7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 ; This is a copy of sibling-call.ll, but stops after the IRTranslator.
 
-define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
+define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) nounwind noinline {
   ; GCN-LABEL: name: i32_fastcc_i32_i32
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
@@ -16,7 +16,7 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
   ret i32 %add0
 }
 
-define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
+define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) nounwind noinline {
   ; GCN-LABEL: name: i32_fastcc_i32_i32_stack_object
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
@@ -38,7 +38,7 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
   ret i32 %add0
 }
 
-define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 {
+define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -57,7 +57,7 @@ entry:
   ret i32 %ret
 }
 
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -84,7 +84,7 @@ entry:
   ret i32 %ret
 }
 
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -111,7 +111,7 @@ entry:
   ret i32 %ret
 }
 
-define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
+define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -131,7 +131,7 @@ entry:
 }
 
 ; It doesn't make sense to do a tail from a kernel
-define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
+define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) nounwind noinline {
   ; GCN-LABEL: name: kernel_call_i32_fastcc_i32_i32_unused_result
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr4_sgpr5
@@ -158,7 +158,7 @@ entry:
   ret void
 }
 
-define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, ptr addrspace(5) byval(i32) align 4 %arg1) #1 {
+define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, ptr addrspace(5) byval(i32) align 4 %arg1) nounwind noinline {
   ; GCN-LABEL: name: i32_fastcc_i32_byval_i32
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $vgpr0
@@ -176,7 +176,7 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, ptr addrspace(5) b
 }
 
 ; Tail call disallowed with byval in parent.
-define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, ptr addrspace(5) byval(i32) %b.byval, i32 %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, ptr addrspace(5) byval(i32) %b.byval, i32 %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
@@ -208,7 +208,7 @@ entry:
 ; Tail call disallowed with byval in parent, not callee. The stack
 ; usage of incoming arguments must be <= the outgoing stack
 ; arguments.
-define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -263,7 +263,7 @@ entry:
   ret i32 %ret
 }
 
-define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 {
+define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) nounwind noinline {
   ; GCN-LABEL: name: i32_fastcc_i32_i32_a32i32
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -318,7 +318,7 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l
   ret i32 %add2
 }
 
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -406,7 +406,7 @@ entry:
   ret i32 %ret
 }
 
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -505,7 +505,7 @@ entry:
 ; If the callee requires more stack argument space than the caller,
 ; don't do a tail call.
 ; TODO: Do we really need this restriction?
-define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
+define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) nounwind noinline {
   ; GCN-LABEL: name: no_sibling_call_callee_more_stack_space
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
@@ -569,7 +569,7 @@ entry:
 }
 
 ; Have another non-tail in the function
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -601,7 +601,7 @@ entry:
 
 ; Have stack object in caller and stack passed arguments. SP should be
 ; in same place at function exit.
-define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
+define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -697,7 +697,7 @@ entry:
   ret i32 %ret
 }
 
-define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 {
+define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -804,7 +804,7 @@ entry:
 
 declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
 
-define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
+define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_fastcc_multi_byval
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -972,7 +972,7 @@ entry:
 declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
 
 ; Callee has a byval and non-byval stack passed argument
-define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
+define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_byval_and_stack_passed
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1164,7 +1164,7 @@ entry:
 
 declare hidden fastcc i64 @i64_fastcc_i64(i64 %arg0)
 
-define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 {
+define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i64_fastcc_i64
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1213,7 +1213,7 @@ entry:
 
 declare hidden fastcc ptr addrspace(1) @p1i8_fastcc_p1i8(ptr addrspace(1) %arg0)
 
-define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspace(1) %a) #1 {
+define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspace(1) %a) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1262,7 +1262,7 @@ entry:
 
 declare hidden fastcc i16 @i16_fastcc_i16(i16 %arg0)
 
-define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 {
+define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_i16_fastcc_i16
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1309,7 +1309,7 @@ entry:
 
 declare hidden fastcc half @f16_fastcc_f16(half %arg0)
 
-define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 {
+define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_f16_fastcc_f16
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1356,7 +1356,7 @@ entry:
 
 declare hidden fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %arg0)
 
-define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 {
+define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_v3i16_fastcc_v3i16
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1410,7 +1410,7 @@ entry:
 
 declare hidden fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %arg0)
 
-define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 {
+define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_v4i16_fastcc_v4i16
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1459,7 +1459,7 @@ entry:
 
 declare hidden fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %arg0)
 
-define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 {
+define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) nounwind noinline {
   ; GCN-LABEL: name: sibling_call_v2i64_fastcc_v2i64
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1512,8 +1512,5 @@ entry:
   ret <2 x i64> %ret
 }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll
index 2f718814ef77b5..fc6580976da2d9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll
@@ -5,7 +5,7 @@
 @lds_512_4 = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
 @lds_4_8 = addrspace(3) global i32 undef, align 8
 
-define amdgpu_kernel void @use_lds_globals(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @use_lds_globals(ptr addrspace(1) %out, ptr addrspace(3) %in) nounwind {
 ; CHECK-LABEL: use_lds_globals:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -30,5 +30,3 @@ entry:
   store i32 9, ptr addrspace(3) @lds_4_8
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
index cd536e2336cac2..3672fb5691772c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
@@ -12,7 +12,7 @@
 ; GCN: .amdgpu_lds lds.external, 0, 4
 ; GCN: .globl lds.defined
 ; GCN: .amdgpu_lds lds.defined, 32, 8
-define amdgpu_gs float @test_basic(i32 inreg %wave, i32 %arg1) #0 {
+define amdgpu_gs float @test_basic(i32 inreg %wave, i32 %arg1) "no-signed-zeros-fp-math"="true" {
 main_body:
   %gep0 = getelementptr [0 x i32], ptr addrspace(3) @lds.external, i32 0, i32 %arg1
   %tmp = load i32, ptr addrspace(3) %gep0
@@ -23,6 +23,3 @@ main_body:
   %r = bitcast i32 %tmp to float
   ret float %r
 }
-
-attributes #0 = { "no-signed-zeros-fp-math"="true" }
-attributes #4 = { convergent nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll
index 36ba7c2ecfac30..f707f2840a741d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll
@@ -1891,32 +1891,30 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.cube.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1darray.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darray.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.2dmsaa.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32, i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.cube.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1darray.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darray.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.2dmsaa.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32, i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
index 8e4e4cf2c5b87f..91be32ec283392 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
@@ -5361,53 +5361,48 @@ main_body:
   ret <4 x float> %data
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare float @llvm.amdgcn.image.load.1d.f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind writeonly }
-attributes #3 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare float @llvm.amdgcn.image.load.1d.f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare float @llvm.amdgcn.image.load.2d.f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
index f61f985cd24ab1..d91d15cbefe82c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
@@ -1605,13 +1605,11 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs
   ret <4 x half> %tex
 }
 
-declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
index adf7e6d38b989b..a732dfaea182a2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
@@ -793,13 +793,11 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %r
   ret <4 x float> %tex
 }
 
-declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll
index 4d36e0f7970167..f4f7367dc65859 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll
@@ -125,7 +125,5 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr ad
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll
index 2c155b72c649f5..4cf16384859d7d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll
@@ -103,7 +103,5 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32
   ret float %tex
 }
 
-declare float @llvm.amdgcn.image.load.3d.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { float, i32 } @llvm.amdgcn.image.load.3d.sl_f32i32s.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.image.load.3d.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { float, i32 } @llvm.amdgcn.image.load.3d.sl_f32i32s.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll
index 5b017ad89a0ed3..d9e4ba456bb2d3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll
@@ -7314,62 +7314,58 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll
index cc2a8ba9c4d5d9..0c3d482e1c658e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll
@@ -2420,28 +2420,24 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
index 12234088adca65..47617c7eaa5d91 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
@@ -480,9 +480,7 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
   ret void
 }
 
-declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind writeonly }
+declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll
index d165fb577efc2c..6c2af80e249946 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll
@@ -6,15 +6,13 @@
 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
 ; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
 define amdgpu_kernel void @test(ptr addrspace(1) %out) {
-  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
+  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() readnone
   %value = load i32, ptr addrspace(4) %dispatch_ptr
   store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
-attributes #0 = { readnone }
+declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
index 4caf83774bbba2..314fcfbf58c6e6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
@@ -1572,8 +1572,6 @@ exit:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #0
-declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) nounwind readnone speculatable
+declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
index 1a49a38158122e..eb410225e8634b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
@@ -1493,7 +1493,7 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_den(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %out) nounwind {
 ; GFX7-LABEL: test_div_scale_f32_val_undef_val:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1541,7 +1541,7 @@ define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %out) nounwind {
 ; GFX7-LABEL: test_div_scale_f32_undef_val_val:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1589,7 +1589,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %out) nounwind {
 ; GFX7-LABEL: test_div_scale_f32_undef_undef_val:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1635,7 +1635,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) nounwind {
 ; GFX7-LABEL: test_div_scale_f64_val_undef_val:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1685,10 +1685,7 @@ define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %ou
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) #1
-declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) #1
-declare float @llvm.fabs.f32(float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) nounwind readnone speculatable
+declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone speculatable
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
index 03ed6837291c88..4fa6f4e682993b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
@@ -274,6 +274,4 @@ define float @ds_fadd_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
   ret float %ret
 }
 
-declare float @llvm.amdgcn.ds.fadd(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
-
-attributes #0 = { argmemonly nounwind willreturn }
+declare float @llvm.amdgcn.ds.fadd(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) argmemonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
index e4c4f42b137ef7..16a9030f7a50f3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
@@ -366,6 +366,4 @@ define float @ds_fmax_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
   ret float %ret
 }
 
-declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
-
-attributes #0 = { argmemonly nounwind willreturn }
+declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) argmemonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
index 0f6fb5acd56ada..03359937282cf7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
@@ -274,6 +274,4 @@ define float @ds_fmin_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
   ret float %ret
 }
 
-declare float @llvm.amdgcn.ds.fmin(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
-
-attributes #0 = { argmemonly nounwind willreturn }
+declare float @llvm.amdgcn.ds.fmin(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) argmemonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
index 1d9514c58ab9c0..eded383ad195f0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
@@ -106,6 +106,4 @@ define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) {
   ret float %ret
 }
 
-declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1 immarg) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1 immarg) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
index 36455f190510ec..6572b4cd2592c8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
@@ -299,7 +299,7 @@ define float @v_add_mul_legacy_f32(float %a, float %b, float %c) {
   ret float %add
 }
 
-define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 {
+define float @v_mad_legacy_f32(float %a, float %b, float %c) "denormal-fp-math-f32"="preserve-sign" {
 ; GFX6-LABEL: v_mad_legacy_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -343,7 +343,7 @@ define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 {
   ret float %add
 }
 
-define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) #2 {
+define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) "denormal-fp-math-f32"="preserve-sign" {
 ; GFX6-LABEL: v_mad_legacy_fneg_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -586,9 +586,5 @@ define float @v_mul_legacy_f32_2.0_swap(float %b) {
   ret float %result
 }
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-
-attributes #0 = { nounwind readnone speculatable willreturn }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { "denormal-fp-math-f32"="preserve-sign" }
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable willreturn
+declare float @llvm.amdgcn.fmul.legacy(float, float) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
index ade6e55b482bb7..27a1dd65a4d7fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
@@ -209,7 +209,4 @@ define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(ptr addrspa
   ret void
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #1
-
-attributes #0 = { nounwind willreturn }
-attributes #1 = { argmemonly nounwind }
+declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) argmemonly nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
index 752ddbb896c6b1..e9ac518113f0ec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
@@ -120,7 +120,5 @@ define void @global_atomic_fadd_v2f16_off_neg2047(ptr addrspace(1) %ptr, <2 x ha
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #0
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) nocapture, <2 x half>) #0
-
-attributes #0 = { argmemonly nounwind willreturn }
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) argmemonly nounwind willreturn
+declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) nocapture, <2 x half>) argmemonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll
index a3796197e4c534..5f67ac204098c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll
@@ -2116,46 +2116,44 @@ main_body:
   ret <2 x float> %out
 }
 
-declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
 
-declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
index 221e2fd4f00f73..eb6e1f0e16241a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
@@ -3986,46 +3986,44 @@ main_body:
   ret <2 x float> %out
 }
 
-declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
 
-declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
index 916b9c0835d41c..237d3812bb3327 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -1012,19 +1012,17 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 immarg, half, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 immarg, half, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll
index 841f4f1ac055ee..13053ea0310205 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll
@@ -1214,20 +1214,18 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll
index 7c04fe31591841..b8309abb08b014 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll
@@ -539,17 +539,15 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll
index d1a36d552e21b5..93d84c6cfd5b62 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll
@@ -397,14 +397,11 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll
index 496f9f428580b6..c021bd2e06ec77 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll
@@ -697,17 +697,14 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <3 x float> @llvm.amdgcn.image.getresinfo.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <2 x float> @llvm.amdgcn.image.getresinfo.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <3 x float> @llvm.amdgcn.image.getresinfo.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <2 x float> @llvm.amdgcn.image.getresinfo.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
index 19b0057d69b69d..f1fa47d8797371 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
@@ -1143,14 +1143,12 @@ define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32
   ret float %vv
 }
 
-declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
+declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
 
-declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll
index ecf81f633761d7..821c2efdc9bc16 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll
@@ -983,14 +983,12 @@ define amdgpu_ps float @load_1d_f32_tfe_dmask_0(<8 x i32> inreg %rsrc, i32 %s) {
   ret float %vv
 }
 
-declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
+declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
 
-declare { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
index fb4c92353cb99d..30fe4302129835 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
@@ -268,7 +268,5 @@ define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc,
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
index 2c8b8126aa09a4..6e1498fae537ba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
@@ -310,7 +310,5 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inre
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
index 11ad98ac5fd1a4..8c661401fd0cc0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
@@ -280,7 +280,5 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inre
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
index f5d11fcdff80a7..fa9a9ee7cab03a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
@@ -280,7 +280,5 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc,
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
index 162a58613065f0..60ce513ef48dde 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
@@ -272,7 +272,5 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc,
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.cd.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.cd.g16.ll
index a39d7ae1fe4f42..5dc038703a1478 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.cd.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.cd.g16.ll
@@ -107,15 +107,11 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll
index a874b30aff2751..c06003f6d87b19 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll
@@ -352,19 +352,15 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll
index d07eadcd641466..f4c0f59a04e32e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll
@@ -141,9 +141,7 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
   ret void
 }
 
-declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind writeonly }
+declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.ll
index 9e445d034edc95..68a70632caa86d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.ll
@@ -735,7 +735,7 @@ define amdgpu_ps void @image_store_v4f32_dmask_0110(<8 x i32> inreg %rsrc, i32 %
   ret void
 }
 
-define amdgpu_ps void @image_store_f32_dmask_1111(<8 x i32> inreg %rsrc, i32 inreg %s, i32 inreg %t, float %in) #0 {
+define amdgpu_ps void @image_store_f32_dmask_1111(<8 x i32> inreg %rsrc, i32 inreg %s, i32 inreg %t, float %in) nounwind writeonly {
 ; GFX6-LABEL: image_store_f32_dmask_1111:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s10
@@ -816,9 +816,7 @@ define amdgpu_ps void @image_store_f32_dmask_1111(<8 x i32> inreg %rsrc, i32 inr
   ret void
 }
 
-declare void @llvm.amdgcn.image.store.2d.f32.i32(float, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v2f32.i32(<2 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v3f32.i32(<3 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind writeonly }
+declare void @llvm.amdgcn.image.store.2d.f32.i32(float, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v2f32.i32(<2 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v3f32.i32(<3 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
+declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll
index 7d693d8a1bb29e..f21f4da4dd647d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll
@@ -4,13 +4,10 @@
 
 ; GCN-LABEL: {{^}}test_ps:
 ; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0
-define amdgpu_ps i32 @test_ps() #1 {
+define amdgpu_ps i32 @test_ps() nounwind {
   %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr()
   %value = load volatile i32, ptr addrspace(4) %implicit_buffer_ptr
   ret i32 %value
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind }
+declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
index 623360f6b1d9c5..f87bf70e70732f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
+define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) nounwind {
 ; GCN-LABEL: v_interp_f32:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    s_mov_b32 s3, exec_lo
@@ -27,11 +27,11 @@ main_body:
   %p1_0 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
   %p0_1 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
   %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_1, float %p1_0, float %p1_1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_1, float %p1_0, float %p1_1, i1 true, i1 true) nounwind
   ret void
 }
 
-define amdgpu_ps void @v_interp_f32_many(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
+define amdgpu_ps void @v_interp_f32_many(float inreg %i, float inreg %j, i32 inreg %m0) nounwind {
 ; GCN-LABEL: v_interp_f32_many:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    s_mov_b32 s3, exec_lo
@@ -69,11 +69,11 @@ main_body:
   %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
   %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
   %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) nounwind
   ret void
 }
 
-define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 {
+define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) nounwind {
 ; GCN-LABEL: v_interp_f32_many_vm:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    global_load_b64 v[0:1], v[0:1], off offset:4
@@ -115,11 +115,11 @@ main_body:
   %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
   %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
   %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) nounwind
   ret void
 }
 
-define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
+define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) nounwind {
 ; GCN-LABEL: v_interp_f16:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    s_mov_b32 s3, exec_lo
@@ -147,7 +147,7 @@ main_body:
   ret half %res
 }
 
-define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 {
+define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) nounwind {
 ; GCN-LABEL: v_interp_f16_imm_params:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
@@ -167,13 +167,10 @@ main_body:
   ret half %res
 }
 
-declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0
-declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0
-declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) #0
-declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) nounwind
+declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) nounwind
+declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) nounwind
+declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) nounwind
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.p1.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.p1.f16.ll
index 92a0dd5748742c..699811ceecade3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.p1.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.p1.f16.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8-32BANK %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8-16BANK %s
 
-define amdgpu_ps float @interp_f16(float %i, i32 inreg %m0) #0 {
+define amdgpu_ps float @interp_f16(float %i, i32 inreg %m0) nounwind readnone speculatable {
 ; GFX9-32BANK-LABEL: interp_f16:
 ; GFX9-32BANK:       ; %bb.0:
 ; GFX9-32BANK-NEXT:    s_mov_b32 m0, s0
@@ -29,7 +29,7 @@ define amdgpu_ps float @interp_f16(float %i, i32 inreg %m0) #0 {
   ret float %res
 }
 
-define amdgpu_ps float @interp_f16_high(float %i, i32 inreg %m0) #0 {
+define amdgpu_ps float @interp_f16_high(float %i, i32 inreg %m0) nounwind readnone speculatable {
 ; GFX9-32BANK-LABEL: interp_f16_high:
 ; GFX9-32BANK:       ; %bb.0:
 ; GFX9-32BANK-NEXT:    s_mov_b32 m0, s0
@@ -55,7 +55,7 @@ define amdgpu_ps float @interp_f16_high(float %i, i32 inreg %m0) #0 {
   ret float %res
 }
 
-define amdgpu_ps float @interp_f16_0_0(float %i, i32 inreg %m0) #0 {
+define amdgpu_ps float @interp_f16_0_0(float %i, i32 inreg %m0) nounwind readnone speculatable {
 ; GFX9-32BANK-LABEL: interp_f16_0_0:
 ; GFX9-32BANK:       ; %bb.0:
 ; GFX9-32BANK-NEXT:    s_mov_b32 m0, s0
@@ -82,7 +82,7 @@ define amdgpu_ps float @interp_f16_0_0(float %i, i32 inreg %m0) #0 {
 }
 
 ; Copy needed to legalize %i
-define amdgpu_ps float @interp_f16_sgpr_i(float inreg %i,i32 inreg %m0) #0 {
+define amdgpu_ps float @interp_f16_sgpr_i(float inreg %i,i32 inreg %m0) nounwind readnone speculatable {
 ; GFX9-32BANK-LABEL: interp_f16_sgpr_i:
 ; GFX9-32BANK:       ; %bb.0:
 ; GFX9-32BANK-NEXT:    v_mov_b32_e32 v0, s0
@@ -111,6 +111,4 @@ define amdgpu_ps float @interp_f16_sgpr_i(float inreg %i,i32 inreg %m0) #0 {
   ret float %res
 }
 
-declare float @llvm.amdgcn.interp.p1.f16(float, i32 immarg, i32 immarg, i1 immarg, i32) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare float @llvm.amdgcn.interp.p1.f16(float, i32 immarg, i32 immarg, i1 immarg, i32) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
index 303dc46e2c8845..c520a5440f324f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
@@ -145,10 +145,8 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i1 @llvm.amdgcn.is.private(ptr nocapture) nounwind readnone speculatable
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
index 63702d2587574b..f929cd74a91e16 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
@@ -145,10 +145,8 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i1 @llvm.amdgcn.is.shared(ptr nocapture) nounwind readnone speculatable
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
index 7fc9842824b01d..5094fbb3db45c8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -10,7 +10,7 @@
 
 ; HSA: .amdhsa_kernarg_size 8
 ; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test(ptr addrspace(1) %out) nounwind "amdgpu-implicitarg-num-bytes"="0" {
   %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
   %value = load i32, ptr addrspace(4) %gep
@@ -27,7 +27,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x15
 
 ; HSA:        .amdhsa_kernarg_size 8
-define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) nounwind "amdgpu-implicitarg-num-bytes"="0" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10
   %value = load i32, ptr addrspace(4) %gep
@@ -46,7 +46,7 @@ define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
 ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
 
 ; HSA:        .amdhsa_kernarg_size 12
-define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 {
+define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) nounwind "amdgpu-implicitarg-num-bytes"="0" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %val = load i32, ptr addrspace(4) %implicitarg.ptr
   store i32 %val, ptr addrspace(1) %out
@@ -64,7 +64,7 @@ define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i
 ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
 
 ; HSA:        .amdhsa_kernarg_size 64
-define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 {
+define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) nounwind "amdgpu-implicitarg-num-bytes"="48" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %val = load i32, ptr addrspace(4) %implicitarg.ptr
   store i32 %val, ptr addrspace(1) %out
@@ -81,7 +81,7 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out,
 
 ; HSA: .amdhsa_kernarg_size 0
 ; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0
-define amdgpu_kernel void @test_no_kernargs() #1 {
+define amdgpu_kernel void @test_no_kernargs() nounwind "amdgpu-implicitarg-num-bytes"="0" {
   %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
   %value = load i32, ptr addrspace(4) %gep
@@ -93,7 +93,7 @@ define amdgpu_kernel void @test_no_kernargs() #1 {
 ; OS-MESA3D: kernarg_segment_byte_size = 16
 ; OS-MESA3D: kernarg_segment_alignment = 4
 ; HSA:        .amdhsa_kernarg_size 48
-define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() nounwind "amdgpu-implicitarg-num-bytes"="48" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   store volatile i32 %val, ptr addrspace(1) null
@@ -104,7 +104,7 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs()
 ; OS-MESA3D: kernarg_segment_byte_size = 16
 ; OS-MESA3D: kernarg_segment_alignment = 4
 ; HSA:        .amdhsa_kernarg_size 40
-define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() nounwind "amdgpu-implicitarg-num-bytes"="38" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   store volatile i32 %val, ptr addrspace(1) null
@@ -119,13 +119,8 @@ define ptr addrspace(4) @func_kernarg_segment_ptr() {
   ret ptr addrspace(4) %ptr
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" }
-attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
-attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }
+declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
index e7faabb72ab691..84aa222929f419 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
@@ -10,7 +10,7 @@ declare <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double, double, <4 x doubl
 declare double @llvm.amdgcn.mfma.f64.4x4x4f64(double, double, double, i32, i32, i32)
 declare i32 @llvm.amdgcn.workitem.id.x()
 
-define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f32_32x32x4bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[34:35], s[0:1], 0x24
@@ -78,7 +78,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f32_16x16x4bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x24
@@ -124,7 +124,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f32_4x4x4bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -154,7 +154,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f32_32x32x8bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x24
@@ -201,7 +201,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f32_16x16x16bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -232,7 +232,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f64_4x4x4f64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -255,7 +255,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f64_16x16x4f64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
@@ -289,7 +289,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f64_16x16x4f64_splat_imm:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -314,7 +314,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f64_16x16x4f64_imm:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
@@ -349,7 +349,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_mfma_f64_16x16x4f64_splat_lit:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x24
@@ -384,5 +384,3 @@ bb:
   store <4 x double> %mai.1, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
index c0cd0686072002..6a975ab48a7b2f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
@@ -43,7 +43,7 @@ define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in) {
 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
-  %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 true) #0
+  %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 true) convergent nounwind readnone
   store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
@@ -85,12 +85,10 @@ define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) {
 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
-  %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 false) #0
+  %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 false) convergent nounwind readnone
   store i64 %tmp0, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #0
-declare i64 @llvm.amdgcn.mov.dpp.i64(i64, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #0
-
-attributes #0 = { convergent nounwind readnone }
+declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) convergent nounwind readnone
+declare i64 @llvm.amdgcn.mov.dpp.i64(i64, i32 immarg, i32 immarg, i32 immarg, i1 immarg) convergent nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
index 1eb0c2a8774258..0ee30624ee71c7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
@@ -6,15 +6,13 @@
 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
 ; GCN: .amdhsa_user_sgpr_queue_ptr 1
 define amdgpu_kernel void @test(ptr addrspace(1) %out) {
-  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
+  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone
   %value = load i32, ptr addrspace(4) %queue_ptr
   store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-
-attributes #0 = { nounwind readnone }
+declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
index 570a39d0fa5fb4..73351532bc4e18 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
@@ -459,7 +459,5 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
   ret float %cast
 }
 
-declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32 immarg) #0
-declare i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
index c96fc017ae936a..f717dc18f6344d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
@@ -487,7 +487,5 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0
   ret void
 }
 
-declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) nounwind
+declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
index 36d5e914d40bed..5453824536899c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
@@ -430,9 +430,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
   ret <4 x half> %val
 }
 
-declare half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
index 23efaa4d2bd91e..25b4b4df22f747 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
@@ -325,9 +325,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff
   ret <4 x float> %val
 }
 
-declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll
index 93d68443c78431..2bdb6eda02224f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll
@@ -251,7 +251,5 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr
   ret float %cast
 }
 
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32 immarg) #0
-declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i64(i64, ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i64(i64, ptr addrspace(8), i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
index 999f42ff905ab7..4ec225827b3965 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
@@ -487,7 +487,5 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsr
   ret void
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) nounwind
+declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll
index 5b19b1c913a94b..6823e8a8475324 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll
@@ -312,9 +312,7 @@ define amdgpu_ps <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_v
   ret <4 x half> %val
 }
 
-declare half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll
index 2dc688db86e4fb..f6ce52d98d4e6b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll
@@ -176,9 +176,7 @@ define amdgpu_ps <4 x float> @raw_ptr_buffer_load_format_v4f32__sgpr_rsrc__vgpr_
   ret <4 x float> %val
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll
index a799e203d6439d..a34deffc9395e7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll
@@ -387,9 +387,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
   ret half %val
 }
 
-declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll
index 3e135472ebbb18..c593cd867b0a38 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll
@@ -224,9 +224,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
   ret float %val
 }
 
-declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
index 1cfb15391be367..8af556b6302567 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
@@ -547,9 +547,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
   ret half %val
 }
 
-declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
index 4f8b20d10c8749..73bab3e2131bae 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
@@ -411,9 +411,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
   ret float %val
 }
 
-declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
index 3def36766fbe0b..9e09411c1e4d35 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
-define float @v_rsq_clamp_f32(float %src) #0 {
+define float @v_rsq_clamp_f32(float %src) nounwind {
 ; SI-LABEL: v_rsq_clamp_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -34,7 +34,7 @@ define float @v_rsq_clamp_f32(float %src) #0 {
   ret float %rsq_clamp
 }
 
-define float @v_rsq_clamp_fabs_f32(float %src) #0 {
+define float @v_rsq_clamp_fabs_f32(float %src) nounwind {
 ; SI-LABEL: v_rsq_clamp_fabs_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -66,7 +66,7 @@ define float @v_rsq_clamp_fabs_f32(float %src) #0 {
   ret float %rsq_clamp
 }
 
-define double @v_rsq_clamp_f64(double %src) #0 {
+define double @v_rsq_clamp_f64(double %src) nounwind {
 ; SI-LABEL: v_rsq_clamp_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -104,7 +104,7 @@ define double @v_rsq_clamp_f64(double %src) #0 {
   ret double %rsq_clamp
 }
 
-define double @v_rsq_clamp_fabs_f64(double %src) #0 {
+define double @v_rsq_clamp_fabs_f64(double %src) nounwind {
 ; SI-LABEL: v_rsq_clamp_fabs_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -143,7 +143,7 @@ define double @v_rsq_clamp_fabs_f64(double %src) #0 {
   ret double %rsq_clamp
 }
 
-define float @v_rsq_clamp_undef_f32() #0 {
+define float @v_rsq_clamp_undef_f32() nounwind {
 ; SI-LABEL: v_rsq_clamp_undef_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -174,7 +174,7 @@ define float @v_rsq_clamp_undef_f32() #0 {
   ret float %rsq_clamp
 }
 
-define double @v_rsq_clamp_undef_f64() #0 {
+define double @v_rsq_clamp_undef_f64() nounwind {
 ; SI-LABEL: v_rsq_clamp_undef_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -212,7 +212,7 @@ define double @v_rsq_clamp_undef_f64() #0 {
   ret double %rsq_clamp
 }
 
-define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
+define float @v_rsq_clamp_f32_non_ieee(float %src) nounwind "amdgpu-ieee"="false" {
 ; SI-LABEL: v_rsq_clamp_f32_non_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -243,7 +243,7 @@ define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
   ret float %rsq_clamp
 }
 
-define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
+define double @v_rsq_clamp_f64_non_ieee(double %src) nounwind "amdgpu-ieee"="false" {
 ; SI-LABEL: v_rsq_clamp_f64_non_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -281,11 +281,7 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
   ret double %rsq_clamp
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
-declare double @llvm.fabs.f64(double) #1
-declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "amdgpu-ieee"="false" }
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.amdgcn.rsq.clamp.f32(float) nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare double @llvm.amdgcn.rsq.clamp.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
index af50f56a872269..f8e0248282b536 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
@@ -1476,6 +1476,4 @@ define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
   ret void
 }
 
-declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll
index a370408a9efcb6..2185b74dd614dd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.sleep(i32) #0
+declare void @llvm.amdgcn.s.sleep(i32) nounwind
 
 ; GCN-LABEL: {{^}}test_s_sleep:
 ; GCN: s_sleep 0{{$}}
@@ -20,7 +20,7 @@ declare void @llvm.amdgcn.s.sleep(i32) #0
 ; GCN: s_sleep 13{{$}}
 ; GCN: s_sleep 14{{$}}
 ; GCN: s_sleep 15{{$}}
-define amdgpu_kernel void @test_s_sleep(i32 %x) #0 {
+define amdgpu_kernel void @test_s_sleep(i32 %x) nounwind {
   call void @llvm.amdgcn.s.sleep(i32 0)
   call void @llvm.amdgcn.s.sleep(i32 1)
   call void @llvm.amdgcn.s.sleep(i32 2)
@@ -41,5 +41,3 @@ define amdgpu_kernel void @test_s_sleep(i32 %x) #0 {
   call void @llvm.amdgcn.s.sleep(i32 15)
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
index 6eed92ba1d71cc..47a7bf62ec63a1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
 
-define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
+define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) nounwind {
 ; GFX6-LABEL: v_bfe_i32_arg_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11,7 +11,7 @@ define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
   ret i32 %bfe_i32
 }
 
-define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
+define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) nounwind {
 ; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_and_b32 s1, s1, 63
@@ -24,12 +24,12 @@ define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i3
 }
 
 ; TODO: Need to expand this
-; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 {
+; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) nounwind {
 ;   %bfe_i64 = call i32 @llvm.amdgcn.sbfe.i64(i32 %src0, i32 %src1, i32 %src2)
 ;   ret i64 %bfe_i64
 ; }
 
-define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
+define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) nounwind {
 ; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_and_b32 s2, s2, 63
@@ -41,7 +41,7 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3
   ret i64 %bfe_i32
 }
 
-define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
 ; GFX6-LABEL: bfe_i32_arg_arg_imm:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -59,7 +59,7 @@ define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0,
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) nounwind {
 ; GFX6-LABEL: bfe_i32_arg_imm_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -77,7 +77,7 @@ define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0,
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) nounwind {
 ; GFX6-LABEL: bfe_i32_imm_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -96,7 +96,7 @@ define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1,
   ret void
 }
 
-define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(1) %src0) #0 {
+define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(1) %src0) nounwind {
 ; GFX6-LABEL: v_bfe_print_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -115,7 +115,7 @@ define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
 ; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -132,7 +132,7 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
 ; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -149,7 +149,7 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_i32_test_6:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -170,7 +170,7 @@ define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_i32_test_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -191,7 +191,7 @@ define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_i32_test_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -212,7 +212,7 @@ define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_i32_test_9:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -231,7 +231,7 @@ define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_i32_test_10:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -250,7 +250,7 @@ define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_i32_test_11:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -269,7 +269,7 @@ define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_i32_test_12:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -288,7 +288,7 @@ define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_i32_test_13:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -308,7 +308,7 @@ define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(
   store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_i32_test_14:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -328,7 +328,7 @@ define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(
   store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -344,7 +344,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -360,7 +360,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -376,7 +376,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_3:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -392,7 +392,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_4:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -408,7 +408,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_5:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -425,7 +425,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_6:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -442,7 +442,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -459,7 +459,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -476,7 +476,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_9:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -493,7 +493,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_10:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -510,7 +510,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_11:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -527,7 +527,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_12:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -544,7 +544,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_13:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -561,7 +561,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_14:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -578,7 +578,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_15:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -595,7 +595,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -611,7 +611,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_17:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -628,7 +628,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_18:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -645,7 +645,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_sext_in_reg_i24:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -667,7 +667,7 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: simplify_demanded_bfe_sdiv:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, 2.0
@@ -708,7 +708,7 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; GFX6-LABEL: bfe_0_width:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -727,7 +727,7 @@ define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; GFX6-LABEL: bfe_8_bfe_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -748,7 +748,7 @@ define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; GFX6-LABEL: bfe_8_bfe_16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -770,7 +770,7 @@ define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1)
 }
 
 ; This really should be folded into 1
-define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; GFX6-LABEL: bfe_16_bfe_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -792,7 +792,7 @@ define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1)
 }
 
 ; Make sure there isn't a redundant BFE
-define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -813,7 +813,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32
   ret void
 }
 
-define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -834,7 +834,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; GFX6-LABEL: sextload_i8_to_i32_bfe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -858,7 +858,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
 ; GFX6-LABEL: sextload_i8_to_i32_bfe_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -882,7 +882,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -904,7 +904,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -926,7 +926,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -948,8 +948,5 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, pt
   ret void
 }
 
-declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1
-declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
+declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
index e2dab03e410aaa..0b64bf7a50e2ed 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
@@ -354,6 +354,4 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.sdot2(<2 x i16>, <2 x i16>, i32, i1 immarg) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.sdot2(<2 x i16>, <2 x i16>, i32, i1 immarg) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
index 06560afee3c9a7..647e117aba31fb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
@@ -129,6 +129,4 @@ define i32 @v_sdot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.sdot4(i32, i32, i32, i1 immarg) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.sdot4(i32, i32, i32, i1 immarg) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll
index 0d729351f65a70..2709080b9480a9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll
@@ -83,6 +83,4 @@ define i32 @v_sdot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.sdot8(i32, i32, i32, i1 immarg) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.sdot8(i32, i32, i32, i1 immarg) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index cbee039df7fd0b..0d4589f9de0350 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) {
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
-  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
+  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) convergent readnone
   store i32 %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -35,7 +35,7 @@ define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) {
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
-  %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0
+  %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) convergent readnone
   store i64 %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -77,7 +77,7 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
 ; GCN-NEXT:    s_endpgm
   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 0, i32 0)
   %cmp = icmp eq i32 %val, 56
-  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
+  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) convergent readnone
   br i1 %cmp, label %.zero, label %.one
 
 .zero:
@@ -93,8 +93,6 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
   ret void
 }
 
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
-declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) convergent readnone
+declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) convergent readnone
 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
-
-attributes #0 = { convergent readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll
index 200d38a0d56994..7ca4866692da23 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll
@@ -77,10 +77,8 @@ define amdgpu_ps <3 x float> @softwqm_v3f32(<3 x float> %val) {
   ret <3 x float> %ret
 }
 
-declare i1 @llvm.amdgcn.softwqm.i1(i1) #0
-declare float @llvm.amdgcn.softwqm.f32(float) #0
-declare <2 x half> @llvm.amdgcn.softwqm.v2f16(<2 x half>) #0
-declare <3 x float> @llvm.amdgcn.softwqm.v3f32(<3 x float>) #0
-declare double @llvm.amdgcn.softwqm.f64(double) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i1 @llvm.amdgcn.softwqm.i1(i1) nounwind readnone speculatable
+declare float @llvm.amdgcn.softwqm.f32(float) nounwind readnone speculatable
+declare <2 x half> @llvm.amdgcn.softwqm.v2f16(<2 x half>) nounwind readnone speculatable
+declare <3 x float> @llvm.amdgcn.softwqm.v3f32(<3 x float>) nounwind readnone speculatable
+declare double @llvm.amdgcn.softwqm.f64(double) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
index ab720ce8f942c3..7332f61909c5a0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
@@ -453,7 +453,5 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_
   ret float %cast
 }
 
-declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg) #0
-declare i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
index 8589fe9fd056de..c10cf93a62cd9b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
@@ -518,7 +518,5 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
   ret void
 }
 
-declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) nounwind
+declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
index 870588014cd29c..b232efe49ef779 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
@@ -518,10 +518,8 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg
   ret half %fval
 }
 
-declare half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
index 686b849ff58f95..ae34831bad57dc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
@@ -580,13 +580,11 @@ define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc,
   ret void
 }
 
-declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
index 94ce8aac8a4c68..91d1e8e4c0f513 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
@@ -834,15 +834,13 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
   ret float %val
 }
 
-declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll
index 2e7323068d108f..d75b7b6ac24872 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll
@@ -245,7 +245,5 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__v
   ret float %cast
 }
 
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i64(i64, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind
+declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i64(i64, ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
index 54657982493f78..5d1dcf6e8c0990 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
@@ -518,7 +518,5 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind
+declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll
index 6c0319ef570d69..0c9b47d15860d8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll
@@ -372,10 +372,8 @@ define amdgpu_ps half @struct_ptr_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex
   ret half %fval
 }
 
-declare half @llvm.amdgcn.struct.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare i16 @llvm.amdgcn.struct.ptr.buffer.load.format.i16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.struct.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare i16 @llvm.amdgcn.struct.ptr.buffer.load.format.i16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll
index 9edc245549113c..41fe2ad7651ad5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll
@@ -322,13 +322,11 @@ define amdgpu_cs void @struct_ptr_buffer_load_format_i32_tfe(ptr addrspace(8) in
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll
index 66c62e9ce8a9ca..11bc3c565fb029 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll
@@ -419,15 +419,13 @@ define amdgpu_ps float @struct_ptr_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr
   ret float %val
 }
 
-declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll
index 1a57c2e77bddff..dc2e52321d4759 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll
@@ -374,9 +374,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
   ret half %val
 }
 
-declare half @llvm.amdgcn.struct.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.struct.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll
index 63143ed718054d..1c4e16014fb02f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll
@@ -211,9 +211,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
   ret float %val
 }
 
-declare float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
index f270f87aae66d6..a0a9393d183f71 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
@@ -521,9 +521,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
   ret half %val
 }
 
-declare half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
index 7d3ecd363befbe..6709964bbdccdf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
@@ -391,9 +391,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
   ret float %val
 }
 
-declare float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
index 1d5cc1e1ec0463..c74c9b24ed58c5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
@@ -171,6 +171,4 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
   ret void
 }
 
-declare double @llvm.amdgcn.trig.preop.f64(double, i32) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare double @llvm.amdgcn.trig.preop.f64(double, i32) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
index 43a0f018dc1cd2..d8c05894f5adef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
 
-define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
+define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) nounwind {
 ; GFX6-LABEL: v_bfe_i32_arg_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11,7 +11,7 @@ define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
   ret i32 %bfe_i32
 }
 
-define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
+define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) nounwind {
 ; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_and_b32 s1, s1, 63
@@ -24,12 +24,12 @@ define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i3
 }
 
 ; TODO: Need to expand this.
-; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 {
+; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) nounwind {
 ;   %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2)
 ;   ret i64 %bfe_i64
 ; }
 
-define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
+define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) nounwind {
 ; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_and_b32 s2, s2, 63
@@ -41,7 +41,7 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3
   ret i64 %bfe_i32
 }
 
-define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
 ; GFX6-LABEL: bfe_u32_arg_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -60,7 +60,7 @@ define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
 ; GFX6-LABEL: bfe_u32_arg_arg_imm:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -78,7 +78,7 @@ define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) nounwind {
 ; GFX6-LABEL: bfe_u32_arg_imm_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -96,7 +96,7 @@ define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) nounwind {
 ; GFX6-LABEL: bfe_u32_imm_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -115,7 +115,7 @@ define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
 ; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -132,7 +132,7 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
 ; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -149,7 +149,7 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_zextload_i8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -171,7 +171,7 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrsp
 }
 
 ; FIXME: Should be using s_add_i32
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -194,7 +194,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -217,7 +217,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -240,7 +240,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -263,7 +263,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -286,7 +286,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -309,7 +309,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -328,7 +328,7 @@ define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -349,7 +349,7 @@ define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_3:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -370,7 +370,7 @@ define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_4:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -392,7 +392,7 @@ define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_5:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -414,7 +414,7 @@ define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_6:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -435,7 +435,7 @@ define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -456,7 +456,7 @@ define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -477,7 +477,7 @@ define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_9:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -496,7 +496,7 @@ define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_10:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -515,7 +515,7 @@ define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_11:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -534,7 +534,7 @@ define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_12:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -554,7 +554,7 @@ define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(
 }
 
 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
-define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_13:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -574,7 +574,7 @@ define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(
   store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: bfe_u32_test_14:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -594,7 +594,7 @@ define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(
   store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -610,7 +610,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -626,7 +626,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -642,7 +642,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_3:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -658,7 +658,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_4:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -674,7 +674,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_5:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -691,7 +691,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_6:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -708,7 +708,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -725,7 +725,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -742,7 +742,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_9:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -759,7 +759,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_10:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -776,7 +776,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_11:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -793,7 +793,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_12:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -810,7 +810,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_13:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -827,7 +827,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_14:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -844,7 +844,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_15:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -861,7 +861,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -877,7 +877,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_17:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -894,7 +894,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) nounwind {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_18:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -935,7 +935,7 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX6-NEXT:    s_endpgm
                                             ptr addrspace(1) %out1,
-                                            ptr addrspace(1) %in) #0 {
+                                            ptr addrspace(1) %in) nounwind {
   %src = load i32, ptr addrspace(1) %in, align 4
   %and = and i32 %src, 63
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
@@ -944,7 +944,7 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0
   ret void
 }
 
-define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 {
+define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) nounwind {
 ; GFX6-LABEL: lshr_and:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -962,7 +962,7 @@ define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; GFX6-LABEL: v_lshr_and:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -980,7 +980,7 @@ define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0
   ret void
 }
 
-define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 {
+define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) nounwind {
 ; GFX6-LABEL: and_lshr:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -998,7 +998,7 @@ define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 {
+define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) nounwind {
 ; GFX6-LABEL: and_lshr2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -1016,7 +1016,7 @@ define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
+define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) nounwind {
 ; GFX6-LABEL: shl_lshr:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -1034,8 +1034,5 @@ define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
-declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone
+declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
index 3acff52874dd9b..15663e12a3e77e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
@@ -337,6 +337,4 @@ define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.udot2(<2 x i16>, <2 x i16>, i32, i1 immarg) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.udot2(<2 x i16>, <2 x i16>, i32, i1 immarg) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll
index b14af9e043e097..4008808f48c292 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll
@@ -148,6 +148,4 @@ define i32 @v_udot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.udot4(i32, i32, i32, i1 immarg) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.udot4(i32, i32, i32, i1 immarg) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll
index a664c8aa508efb..45ee0fe55e60b3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll
@@ -84,6 +84,4 @@ define i32 @v_udot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.udot8(i32, i32, i32, i1 immarg) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.udot8(i32, i32, i32, i1 immarg) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
index f7adfe47b64f25..88a5362b410740 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
@@ -93,14 +93,11 @@ define amdgpu_kernel void @update_dpp64_test(ptr addrspace(1) %arg, i64 %in1, i6
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
   %load = load i64, ptr addrspace(1) %gep
-  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 1, i32 1, i1 false) #1
+  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 1, i32 1, i1 false) convergent nounwind readnone
   store i64 %tmp0, ptr addrspace(1) %gep
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #1
-declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #1
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { convergent nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) convergent nounwind readnone
+declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32 immarg, i32 immarg, i32 immarg, i1 immarg) convergent nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
index df201c1903b642..e0d011fc685458 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
@@ -3,9 +3,9 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone
 
 ; ALL-LABEL: {{^}}test_workgroup_id_x:
 
@@ -32,7 +32,7 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workgroup.id.x()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -59,7 +59,7 @@ define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workgroup.id.y()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -94,14 +94,11 @@ define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workgroup.id.z()
   store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index 2e62d13f1e69a7..ca789f55fc0269 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -8,9 +8,9 @@
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone
 
 ; MESA: .section .AMDGPU.config
 ; MESA: .long 47180
@@ -23,7 +23,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
 ; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0
 
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -41,7 +41,7 @@ define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 {
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1
-define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.y()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -59,7 +59,7 @@ define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 {
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.z()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -70,7 +70,7 @@ define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) #1 {
 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
 ; ALL-NOT: v0
 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
-define amdgpu_kernel void @test_workitem_id_x_usex2(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workitem_id_x_usex2(ptr addrspace(1) %out) nounwind {
   %id0 = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %id0, ptr addrspace(1) %out
 
@@ -84,7 +84,7 @@ define amdgpu_kernel void @test_workitem_id_x_usex2(ptr addrspace(1) %out) #1 {
 ; ALL: {{flat|global}}_store_{{dword|b32}}
 ; ALL-NOT: v0
 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
-define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(ptr addrspace(1) %out, i32 %arg) #1 {
+define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(ptr addrspace(1) %out, i32 %arg) nounwind {
 bb0:
   store volatile i32 0, ptr addrspace(1) %out
   %cond = icmp eq i32 %arg, 0
@@ -103,7 +103,7 @@ bb2:
 ; ALL: s_waitcnt
 ; HSA-NEXT: v_and_b32_e32 v2, 0x3ff, v31
 ; MESA-NEXT: v_and_b32_e32 v2, 0x3ff, v31
-define void @test_workitem_id_x_func(ptr addrspace(1) %out) #1 {
+define void @test_workitem_id_x_func(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -112,7 +112,7 @@ define void @test_workitem_id_x_func(ptr addrspace(1) %out) #1 {
 ; ALL-LABEL: {{^}}test_workitem_id_y_func:
 ; HSA: v_bfe_u32 v2, v31, 10, 10
 ; MESA: v_bfe_u32 v2, v31, 10, 10
-define void @test_workitem_id_y_func(ptr addrspace(1) %out) #1 {
+define void @test_workitem_id_y_func(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.y()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -121,7 +121,7 @@ define void @test_workitem_id_y_func(ptr addrspace(1) %out) #1 {
 ; ALL-LABEL: {{^}}test_workitem_id_z_func:
 ; HSA: v_bfe_u32 v2, v31, 20, 10
 ; MESA: v_bfe_u32 v2, v31, 20, 10
-define void @test_workitem_id_z_func(ptr addrspace(1) %out) #1 {
+define void @test_workitem_id_z_func(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.z()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -193,9 +193,6 @@ define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_
   ret void
 }
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-
 !0 = !{i32 64, i32 1, i32 1}
 !1 = !{i32 1, i32 64, i32 1}
 !2 = !{i32 1, i32 1, i32 64}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
index a36b25ccfa48e4..a03ce020f31992 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
@@ -69,7 +69,7 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
   %c1 = fcmp oge float %arg1, 0.0
   call void @llvm.amdgcn.wqm.demote(i1 false)
   %tmp1 = select i1 %c0, float 1.000000e+00, float 0.000000e+00
-  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -150,7 +150,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
   %c1 = fcmp oge float %arg1, 0.0
   call void @llvm.amdgcn.wqm.demote(i1 %c1)
   %tmp1 = select i1 %c0, float 1.000000e+00, float 0.000000e+00
-  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -276,7 +276,7 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
 
 .continue:
   %tmp1 = select i1 %c2, float 1.000000e+00, float 0.000000e+00
-  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -405,11 +405,11 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
   br label %.continue
 
 .continue:
-  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
   %tex0 = extractelement <4 x float> %tex, i32 0
   %tex1 = extractelement <4 x float> %tex, i32 0
   %coord1 = fadd float %tex0, %tex1
-  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
 
   ret <4 x float> %rtex
 }
@@ -531,7 +531,7 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
 ; GFX10-64-NEXT:    s_endpgm
 ; GFX10-64-NEXT:  .LBB4_5:
 .entry:
-  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
   %tex0 = extractelement <4 x float> %tex, i32 0
   %tex1 = extractelement <4 x float> %tex, i32 0
   %z.cmp = fcmp olt float %tex0, 0.0
@@ -543,7 +543,7 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
 
 .continue:
   %coord1 = fadd float %tex0, %tex1
-  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
 
   ret <4 x float> %rtex
 }
@@ -645,13 +645,13 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
 ; GFX10-64-NEXT:    s_endpgm
 ; GFX10-64-NEXT:  .LBB5_3:
 .entry:
-  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
   %tex0 = extractelement <4 x float> %tex, i32 0
   %tex1 = extractelement <4 x float> %tex, i32 0
   %z.cmp = fcmp olt float %tex0, 0.0
   call void @llvm.amdgcn.wqm.demote(i1 %z.cmp)
   %coord1 = fadd float %tex0, %tex1
-  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
 
   ret <4 x float> %rtex
 }
@@ -847,8 +847,8 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
 .entry:
   %p0 = extractelement <2 x float> %input, i32 0
   %p1 = extractelement <2 x float> %input, i32 1
-  %x0 = call float @llvm.amdgcn.interp.p1(float %p0, i32 immarg 0, i32 immarg 0, i32 %index) #2
-  %x1 = call float @llvm.amdgcn.interp.p2(float %x0, float %p1, i32 immarg 0, i32 immarg 0, i32 %index) #2
+  %x0 = call float @llvm.amdgcn.interp.p1(float %p0, i32 immarg 0, i32 immarg 0, i32 %index) nounwind readnone speculatable
+  %x1 = call float @llvm.amdgcn.interp.p2(float %x0, float %p1, i32 immarg 0, i32 immarg 0, i32 %index) nounwind readnone speculatable
   %argi = fptosi float %arg to i32
   %cond0 = icmp eq i32 %argi, 0
   br i1 %cond0, label %.continue0, label %.demote0
@@ -875,7 +875,7 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
   br label %.continue1
 
 .continue1:
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> <half 0xH3C00, half 0xH0000>, <2 x half> <half 0xH0000, half 0xH3C00>, i1 immarg true, i1 immarg true) #3
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> <half 0xH3C00, half 0xH0000>, <2 x half> <half 0xH0000, half 0xH3C00>, i1 immarg true, i1 immarg true) inaccessiblememonly nounwind
   ret void
 }
 
@@ -1141,8 +1141,8 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
 .entry:
   %p0 = extractelement <2 x float> %input, i32 0
   %p1 = extractelement <2 x float> %input, i32 1
-  %x0 = call float @llvm.amdgcn.interp.p1(float %p0, i32 immarg 0, i32 immarg 0, i32 %index) #2
-  %x1 = call float @llvm.amdgcn.interp.p2(float %x0, float %p1, i32 immarg 0, i32 immarg 0, i32 %index) #2
+  %x0 = call float @llvm.amdgcn.interp.p1(float %p0, i32 immarg 0, i32 immarg 0, i32 %index) nounwind readnone speculatable
+  %x1 = call float @llvm.amdgcn.interp.p2(float %x0, float %p1, i32 immarg 0, i32 immarg 0, i32 %index) nounwind readnone speculatable
   %argi = fptosi float %arg to i32
   %cond0 = icmp eq i32 %argi, 0
   br i1 %cond0, label %.continue0, label %.demote0
@@ -1175,22 +1175,16 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
   br i1 %loop.cond, label %.continue0, label %.return
 
 .return:
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> <half 0xH3C00, half 0xH0000>, <2 x half> <half 0xH0000, half 0xH3C00>, i1 immarg true, i1 immarg true) #3
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> <half 0xH3C00, half 0xH0000>, <2 x half> <half 0xH0000, half 0xH3C00>, i1 immarg true, i1 immarg true) inaccessiblememonly nounwind
   ret void
 }
 
-declare void @llvm.amdgcn.wqm.demote(i1) #0
-declare i1 @llvm.amdgcn.live.mask() #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare float @llvm.amdgcn.wqm.f32(float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #2
-declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #3
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #4
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readnone speculatable }
-attributes #3 = { inaccessiblememonly nounwind }
-attributes #4 = { convergent nounwind readnone }
+declare void @llvm.amdgcn.wqm.demote(i1) nounwind
+declare i1 @llvm.amdgcn.live.mask() nounwind
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.wqm.f32(float) nounwind readnone
+declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) nounwind readnone speculatable
+declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) nounwind readnone speculatable
+declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) inaccessiblememonly nounwind
+declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) convergent nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll
index edc93f4effb825..625d6c3ad837c9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll
@@ -77,10 +77,8 @@ define amdgpu_ps <3 x float> @wqm_v3f32(<3 x float> %val) {
   ret <3 x float> %ret
 }
 
-declare i1 @llvm.amdgcn.wqm.i1(i1) #0
-declare float @llvm.amdgcn.wqm.f32(float) #0
-declare <2 x half> @llvm.amdgcn.wqm.v2f16(<2 x half>) #0
-declare <3 x float> @llvm.amdgcn.wqm.v3f32(<3 x float>) #0
-declare double @llvm.amdgcn.wqm.f64(double) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i1 @llvm.amdgcn.wqm.i1(i1) nounwind readnone speculatable
+declare float @llvm.amdgcn.wqm.f32(float) nounwind readnone speculatable
+declare <2 x half> @llvm.amdgcn.wqm.v2f16(<2 x half>) nounwind readnone speculatable
+declare <3 x float> @llvm.amdgcn.wqm.v3f32(<3 x float>) nounwind readnone speculatable
+declare double @llvm.amdgcn.wqm.f64(double) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
index 17f3dd7398deaf..600da6e9732467 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 
-define amdgpu_ps float @test_writelane_s_s_s(i32 inreg %data, i32 inreg %lane, i32 inreg %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_s_s_s(i32 inreg %data, i32 inreg %lane, i32 inreg %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_s_s_s:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -29,7 +29,7 @@ define amdgpu_ps float @test_writelane_s_s_s(i32 inreg %data, i32 inreg %lane, i
   ret float %writelane.cast
 }
 
-define amdgpu_ps float @test_writelane_s_s_imm(i32 inreg %data, i32 inreg %lane) #0 {
+define amdgpu_ps float @test_writelane_s_s_imm(i32 inreg %data, i32 inreg %lane) nounwind {
 ; GFX7-LABEL: test_writelane_s_s_imm:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_mov_b32_e32 v0, 42
@@ -55,7 +55,7 @@ define amdgpu_ps float @test_writelane_s_s_imm(i32 inreg %data, i32 inreg %lane)
 }
 
 ; data is not inline imm
-define amdgpu_ps float @test_writelane_k_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_k_s_v(i32 inreg %lane, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_k_s_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_movk_i32 s0, 0x3e7
@@ -81,7 +81,7 @@ define amdgpu_ps float @test_writelane_k_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
 }
 
 ; Data is inline imm
-define amdgpu_ps float @test_writelane_imm_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_imm_s_v(i32 inreg %lane, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_imm_s_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_writelane_b32 v0, 42, s2
@@ -102,7 +102,7 @@ define amdgpu_ps float @test_writelane_imm_s_v(i32 inreg %lane, i32 %vdst.in) #0
 }
 
 ; Data is subtarget dependent inline imm
-define amdgpu_ps float @test_writelane_imminv2pi_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_imminv2pi_s_v(i32 inreg %lane, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_imminv2pi_s_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_mov_b32 s0, 0x3e22f983
@@ -126,7 +126,7 @@ define amdgpu_ps float @test_writelane_imminv2pi_s_v(i32 inreg %lane, i32 %vdst.
 
 
 ; Lane is inline imm
-define amdgpu_ps float @test_writelane_s_imm_v(i32 inreg %data, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_s_imm_v(i32 inreg %data, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_s_imm_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_writelane_b32 v0, s2, 23
@@ -147,7 +147,7 @@ define amdgpu_ps float @test_writelane_s_imm_v(i32 inreg %data, i32 %vdst.in) #0
 }
 
 ; Lane index is larger than the wavesize
-define amdgpu_ps float @test_writelane_s_k0_v(i32 inreg %data, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_s_k0_v(i32 inreg %data, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_s_k0_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_writelane_b32 v0, s2, 3
@@ -169,7 +169,7 @@ define amdgpu_ps float @test_writelane_s_k0_v(i32 inreg %data, i32 %vdst.in) #0
 }
 
 ; Lane index is larger than the wavesize for wave32
-define amdgpu_ps float @test_writelane_s_k1_v(i32 inreg %data, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_s_k1_v(i32 inreg %data, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_s_k1_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_writelane_b32 v0, s2, 32
@@ -189,7 +189,7 @@ define amdgpu_ps float @test_writelane_s_k1_v(i32 inreg %data, i32 %vdst.in) #0
   ret float %writelane.cast
 }
 
-define amdgpu_ps float @test_writelane_v_v_v(i32 %data, i32 %lane, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_v_v_v(i32 %data, i32 %lane, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_v_v_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_readfirstlane_b32 s1, v1
@@ -220,7 +220,7 @@ define amdgpu_ps float @test_writelane_v_v_v(i32 %data, i32 %lane, i32 %vdst.in)
   ret float %writelane.cast
 }
 
-define amdgpu_ps float @test_writelane_v_s_v(i32 %data, i32 inreg %lane, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_v_s_v(i32 %data, i32 inreg %lane, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_v_s_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
@@ -250,7 +250,7 @@ define amdgpu_ps float @test_writelane_v_s_v(i32 %data, i32 inreg %lane, i32 %vd
 
 ; FIXME: This could theoretically use m0 directly as the data source,
 ; and another SGPR as the lane selector and avoid register swap.
-define amdgpu_ps float @test_writelane_m0_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_m0_s_v(i32 inreg %lane, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_m0_s_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    ;;#ASMSTART
@@ -284,7 +284,7 @@ define amdgpu_ps float @test_writelane_m0_s_v(i32 inreg %lane, i32 %vdst.in) #0
   ret float %writelane.cast
 }
 
-define amdgpu_ps float @test_writelane_s_m0_v(i32 inreg %data, i32 %vdst.in) #0 {
+define amdgpu_ps float @test_writelane_s_m0_v(i32 inreg %data, i32 %vdst.in) nounwind {
 ; GFX7-LABEL: test_writelane_s_m0_v:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    ;;#ASMSTART
@@ -314,9 +314,5 @@ define amdgpu_ps float @test_writelane_s_m0_v(i32 inreg %data, i32 %vdst.in) #0
   ret float %writelane.cast
 }
 
-declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind readnone willreturn }
-attributes #2 = { nounwind readnone speculatable willreturn }
+declare i32 @llvm.amdgcn.writelane(i32, i32, i32) convergent nounwind readnone willreturn
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll
index bf48683b0bec94..a932a195e33471 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll
@@ -155,15 +155,13 @@ define amdgpu_ps <3 x float> @strict_wwm_v3f32(<3 x float> %val) {
   ret <3 x float> %ret
 }
 
-declare i1 @llvm.amdgcn.wwm.i1(i1) #0
-declare float @llvm.amdgcn.wwm.f32(float) #0
-declare <2 x half> @llvm.amdgcn.wwm.v2f16(<2 x half>) #0
-declare <3 x float> @llvm.amdgcn.wwm.v3f32(<3 x float>) #0
-declare double @llvm.amdgcn.wwm.f64(double) #0
-declare i1 @llvm.amdgcn.strict.wwm.i1(i1) #0
-declare float @llvm.amdgcn.strict.wwm.f32(float) #0
-declare <2 x half> @llvm.amdgcn.strict.wwm.v2f16(<2 x half>) #0
-declare <3 x float> @llvm.amdgcn.strict.wwm.v3f32(<3 x float>) #0
-declare double @llvm.amdgcn.strict.wwm.f64(double) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i1 @llvm.amdgcn.wwm.i1(i1) nounwind readnone speculatable
+declare float @llvm.amdgcn.wwm.f32(float) nounwind readnone speculatable
+declare <2 x half> @llvm.amdgcn.wwm.v2f16(<2 x half>) nounwind readnone speculatable
+declare <3 x float> @llvm.amdgcn.wwm.v3f32(<3 x float>) nounwind readnone speculatable
+declare double @llvm.amdgcn.wwm.f64(double) nounwind readnone speculatable
+declare i1 @llvm.amdgcn.strict.wwm.i1(i1) nounwind readnone speculatable
+declare float @llvm.amdgcn.strict.wwm.f32(float) nounwind readnone speculatable
+declare <2 x half> @llvm.amdgcn.strict.wwm.v2f16(<2 x half>) nounwind readnone speculatable
+declare <3 x float> @llvm.amdgcn.strict.wwm.v3f32(<3 x float>) nounwind readnone speculatable
+declare double @llvm.amdgcn.strict.wwm.f64(double) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
index b169063d67872d..b6d357cd611cec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
@@ -554,8 +554,6 @@ define float @v_powi_neg128_f32(float %l) {
 ;   ret double %res
 ; }
 
-declare half @llvm.powi.f16.i32(half, i32) #0
-declare float @llvm.powi.f32.i32(float, i32) #0
-declare double @llvm.powi.f64.i32(double, i32) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare half @llvm.powi.f16.i32(half, i32) nounwind readnone speculatable willreturn
+declare float @llvm.powi.f32.i32(float, i32) nounwind readnone speculatable willreturn
+declare double @llvm.powi.f64.i32(double, i32) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
index b940dc74839b26..702c6eb9cb8187 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
@@ -262,9 +262,7 @@ bb.1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
index 61263e0efa2ea1..58dd4c6da99a07 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
@@ -358,6 +358,4 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
index d6a7ae8d867fe8..8f522860e4394a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
@@ -534,6 +534,4 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
index 74af51f0c1676d..b6e857d76a2eb0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
@@ -5,26 +5,26 @@
 
 --- |
   define amdgpu_kernel void @load_global_v8i32_non_uniform(ptr addrspace(1) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %global.not.uniform.v8i32 = getelementptr <8 x i32>, ptr addrspace(1) %in, i32 %tmp0
     %tmp2 = load <8 x i32>, ptr addrspace(1) %global.not.uniform.v8i32
     ret void
   }
 
   define amdgpu_kernel void @load_global_v4i64_non_uniform(ptr addrspace(1) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %global.not.uniform.v4i64 = getelementptr <4 x i64>, ptr addrspace(1) %in, i32 %tmp0
     %tmp2 = load <4 x i64>, ptr addrspace(1) %global.not.uniform.v4i64
     ret void
   }
   define amdgpu_kernel void @load_global_v16i32_non_uniform(ptr addrspace(1) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %global.not.uniform.v16i32 = getelementptr <16 x i32>, ptr addrspace(1) %in, i32 %tmp0
     %tmp2 = load <16 x i32>, ptr addrspace(1) %global.not.uniform.v16i32
     ret void
   }
   define amdgpu_kernel void @load_global_v8i64_non_uniform(ptr addrspace(1) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %global.not.uniform.v8i64 = getelementptr <8 x i64>, ptr addrspace(1) %in, i32 %tmp0
     %tmp2 = load <8 x i64>, ptr addrspace(1) %global.not.uniform.v8i64
     ret void
@@ -34,40 +34,40 @@
   define amdgpu_kernel void @load_global_v16i32_uniform() {ret void}
   define amdgpu_kernel void @load_global_v8i64_uniform() {ret void}
   define amdgpu_kernel void @load_constant_v8i32_non_uniform(ptr addrspace(4) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %constant.not.uniform.v8i32 = getelementptr <8 x i32>, ptr addrspace(4) %in, i32 %tmp0
     %tmp2 = load <8 x i32>, ptr addrspace(4) %constant.not.uniform.v8i32
     ret void
   }
 
   define amdgpu_kernel void @load_constant_i256_non_uniform(ptr addrspace(4) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %constant.not.uniform = getelementptr i256, ptr addrspace(4) %in, i32 %tmp0
     %tmp2 = load i256, ptr addrspace(4) %constant.not.uniform
     ret void
   }
 
   define amdgpu_kernel void @load_constant_v16i16_non_uniform(ptr addrspace(4) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %constant.not.uniform = getelementptr <16 x i16>, ptr addrspace(4) %in, i32 %tmp0
     %tmp2 = load <16 x i16>, ptr addrspace(4) %constant.not.uniform
     ret void
   }
 
   define amdgpu_kernel void @load_constant_v4i64_non_uniform(ptr addrspace(4) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %constant.not.uniform.v4i64 = getelementptr <4 x i64>, ptr addrspace(4) %in, i32 %tmp0
     %tmp2 = load <4 x i64>, ptr addrspace(4) %constant.not.uniform.v4i64
     ret void
   }
   define amdgpu_kernel void @load_constant_v16i32_non_uniform(ptr addrspace(4) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %constant.not.uniform.v16i32 = getelementptr <16 x i32>, ptr addrspace(4) %in, i32 %tmp0
     %tmp2 = load <16 x i32>, ptr addrspace(4) %constant.not.uniform.v16i32
     ret void
   }
   define amdgpu_kernel void @load_constant_v8i64_non_uniform(ptr addrspace(4) %in) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %constant.not.uniform.v8i64 = getelementptr <8 x i64>, ptr addrspace(4) %in, i32 %tmp0
     %tmp2 = load <8 x i64>, ptr addrspace(4) %constant.not.uniform.v8i64
     ret void
@@ -103,8 +103,7 @@
   define amdgpu_kernel void @load_constant_i96_align8() { ret void }
   define amdgpu_kernel void @load_constant_i96_align16() { ret void }
 
-  declare i32 @llvm.amdgcn.workitem.id.x() #0
-  attributes #0 = { nounwind readnone }
+  declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
index c50187f5949011..2e788f2d08e2b4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
@@ -36,7 +36,7 @@
   }
 
   define amdgpu_kernel void @load_global_non_uniform(ptr addrspace(1) %ptr2) {
-    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
+    %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     %tmp1 = getelementptr i32, ptr addrspace(1) %ptr2, i32 %tmp0
     %tmp2 = load i32, ptr addrspace(1) %tmp1
     ret void
@@ -48,8 +48,7 @@
     ret void
   }
 
-  declare i32 @llvm.amdgcn.workitem.id.x() #0
-  attributes #0 = { nounwind readnone }
+  declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   !0 = !{}
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index a6f9bb7ee055d4..76210f06361245 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -6313,34 +6313,32 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.sadd.sat.i7(i7, i7) #0
-declare i8 @llvm.sadd.sat.i8(i8, i8) #0
-declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) #0
+declare i7 @llvm.sadd.sat.i7(i7, i7) nounwind readnone speculatable willreturn
+declare i8 @llvm.sadd.sat.i8(i8, i8) nounwind readnone speculatable willreturn
+declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) nounwind readnone speculatable willreturn
+declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) nounwind readnone speculatable willreturn
 
-declare i16 @llvm.sadd.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.sadd.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.sadd.sat.v5i16(<5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.sadd.sat.v6i16(<6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) #0
+declare i16 @llvm.sadd.sat.i16(i16, i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.sadd.sat.v3i16(<3 x i16>, <3 x i16>) nounwind readnone speculatable willreturn
+declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone speculatable willreturn
+declare <5 x i16> @llvm.sadd.sat.v5i16(<5 x i16>, <5 x i16>) nounwind readnone speculatable willreturn
+declare <6 x i16> @llvm.sadd.sat.v6i16(<6 x i16>, <6 x i16>) nounwind readnone speculatable willreturn
+declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone speculatable willreturn
 
-declare i24 @llvm.sadd.sat.i24(i24, i24) #0
+declare i24 @llvm.sadd.sat.i24(i24, i24) nounwind readnone speculatable willreturn
 
-declare i32 @llvm.sadd.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.sadd.sat.v5i32(<5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32>, <16 x i32>) #0
+declare i32 @llvm.sadd.sat.i32(i32, i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone speculatable willreturn
+declare <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32>, <3 x i32>) nounwind readnone speculatable willreturn
+declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone speculatable willreturn
+declare <5 x i32> @llvm.sadd.sat.v5i32(<5 x i32>, <5 x i32>) nounwind readnone speculatable willreturn
+declare <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32>, <16 x i32>) nounwind readnone speculatable willreturn
 
-declare i48 @llvm.sadd.sat.i48(i48, i48) #0
+declare i48 @llvm.sadd.sat.i48(i48, i48) nounwind readnone speculatable willreturn
 
-declare i64 @llvm.sadd.sat.i64(i64, i64) #0
-declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) #0
+declare i64 @llvm.sadd.sat.i64(i64, i64) nounwind readnone speculatable willreturn
+declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone speculatable willreturn
 
-declare i128 @llvm.sadd.sat.i128(i128, i128) #0
-declare <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128>, <2 x i128>) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i128 @llvm.sadd.sat.i128(i128, i128) nounwind readnone speculatable willreturn
+declare <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128>, <2 x i128>) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
index 7ad19a47970039..ef78eacb1a2200 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
@@ -721,6 +721,4 @@ define <2 x i32> @v_shl_v2i32_zext_v2i16(<2 x i16> %x) {
   ret <2 x i32> %shl
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 320dfbb4980e4c..e4a747b4430d9a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -6381,34 +6381,32 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.ssub.sat.i7(i7, i7) #0
-declare i8 @llvm.ssub.sat.i8(i8, i8) #0
-declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) #0
+declare i7 @llvm.ssub.sat.i7(i7, i7) nounwind readnone speculatable willreturn
+declare i8 @llvm.ssub.sat.i8(i8, i8) nounwind readnone speculatable willreturn
+declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) nounwind readnone speculatable willreturn
+declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) nounwind readnone speculatable willreturn
 
-declare i16 @llvm.ssub.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.ssub.sat.v5i16(<5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.ssub.sat.v6i16(<6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) #0
+declare i16 @llvm.ssub.sat.i16(i16, i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) nounwind readnone speculatable willreturn
+declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone speculatable willreturn
+declare <5 x i16> @llvm.ssub.sat.v5i16(<5 x i16>, <5 x i16>) nounwind readnone speculatable willreturn
+declare <6 x i16> @llvm.ssub.sat.v6i16(<6 x i16>, <6 x i16>) nounwind readnone speculatable willreturn
+declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone speculatable willreturn
 
-declare i24 @llvm.ssub.sat.i24(i24, i24) #0
+declare i24 @llvm.ssub.sat.i24(i24, i24) nounwind readnone speculatable willreturn
 
-declare i32 @llvm.ssub.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.ssub.sat.v5i32(<5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0
+declare i32 @llvm.ssub.sat.i32(i32, i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone speculatable willreturn
+declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) nounwind readnone speculatable willreturn
+declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone speculatable willreturn
+declare <5 x i32> @llvm.ssub.sat.v5i32(<5 x i32>, <5 x i32>) nounwind readnone speculatable willreturn
+declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) nounwind readnone speculatable willreturn
 
-declare i48 @llvm.ssub.sat.i48(i48, i48) #0
+declare i48 @llvm.ssub.sat.i48(i48, i48) nounwind readnone speculatable willreturn
 
-declare i64 @llvm.ssub.sat.i64(i64, i64) #0
-declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) #0
+declare i64 @llvm.ssub.sat.i64(i64, i64) nounwind readnone speculatable willreturn
+declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone speculatable willreturn
 
-declare i128 @llvm.ssub.sat.i128(i128, i128) #0
-declare <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128>, <2 x i128>) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i128 @llvm.ssub.sat.i128(i128, i128) nounwind readnone speculatable willreturn
+declare <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128>, <2 x i128>) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll
index 15740ee5476e84..e0b2cfa6288c7e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
 
-define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0 {
+define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f16_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12,7 +12,7 @@ define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0
   ret half %val
 }
 
-define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 {
+define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y, <2 x half> %z) strictfp {
 ; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -34,7 +34,7 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x ha
   ret <2 x half> %val
 }
 
-define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y, <3 x half> %z) #0 {
+define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y, <3 x half> %z) strictfp {
 ; GFX9-LABEL: v_constained_fma_v3f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -58,7 +58,7 @@ define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x ha
   ret <3 x half> %val
 }
 
-define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y, <4 x half> %z) #0 {
+define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y, <4 x half> %z) strictfp {
 ; GFX9-LABEL: v_constained_fma_v4f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -88,7 +88,7 @@ define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x ha
   ret <4 x half> %val
 }
 
-define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z) #0 {
+define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -99,7 +99,7 @@ define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z
   ret half %val
 }
 
-define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, half %z) #0 {
+define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, half %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -111,19 +111,19 @@ define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, ha
   ret half %val
 }
 
-define half @v_constained_fma_f16_fpexcept_strict_fabs_fabs(half %x, half %y, half %z) #0 {
+define half @v_constained_fma_f16_fpexcept_strict_fabs_fabs(half %x, half %y, half %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_fma_f16 v0, |v0|, |v1|, v2
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
-  %neg.x = call half @llvm.fabs.f16(half %x) #0
-  %neg.y = call half @llvm.fabs.f16(half %y) #0
+  %neg.x = call half @llvm.fabs.f16(half %x) strictfp
+  %neg.y = call half @llvm.fabs.f16(half %y) strictfp
   %val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
 
-define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 {
+define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %x, <2 x half> %y, <2 x half> %z) strictfp {
 ; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -154,5 +154,3 @@ declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata,
 declare <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, metadata, metadata)
 declare <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half>, <3 x half>, <3 x half>, metadata, metadata)
 declare <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f32.ll
index 5955c590e9d1cb..4404175b048c22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f32.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
 
-define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) #0 {
+define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11,7 +11,7 @@ define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z)
   ret float %val
 }
 
-define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
+define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -22,7 +22,7 @@ define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) #0 {
+define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v3f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -34,7 +34,7 @@ define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x
   ret <3 x float> %val
 }
 
-define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
+define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v4f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -47,7 +47,7 @@ define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x
   ret <4 x float> %val
 }
 
-define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) #0 {
+define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -58,7 +58,7 @@ define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, floa
   ret float %val
 }
 
-define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) #0 {
+define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -70,19 +70,19 @@ define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y,
   ret float %val
 }
 
-define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) #0 {
+define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
-  %neg.x = call float @llvm.fabs.f32(float %x) #0
-  %neg.y = call float @llvm.fabs.f32(float %y) #0
+  %neg.x = call float @llvm.fabs.f32(float %x) strictfp
+  %neg.y = call float @llvm.fabs.f32(float %y) strictfp
   %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
 }
 
-define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
+define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -100,5 +100,3 @@ declare float @llvm.experimental.constrained.fma.f32(float, float, float, metada
 declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata)
 declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata)
 declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f64.ll
index 04a07c42c934c6..c5187069c4f25b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f64.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
 
-define double @v_constained_fma_f64_fpexcept_strict(double %x, double %y, double %z) #0 {
+define double @v_constained_fma_f64_fpexcept_strict(double %x, double %y, double %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11,7 +11,7 @@ define double @v_constained_fma_f64_fpexcept_strict(double %x, double %y, double
   ret double %val
 }
 
-define <2 x double> @v_constained_fma_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 {
+define <2 x double> @v_constained_fma_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y, <2 x double> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v2f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -22,7 +22,7 @@ define <2 x double> @v_constained_fma_v2f64_fpexcept_strict(<2 x double> %x, <2
   ret <2 x double> %val
 }
 
-define <3 x double> @v_constained_fma_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y, <3 x double> %z) #0 {
+define <3 x double> @v_constained_fma_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y, <3 x double> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v3f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -34,7 +34,7 @@ define <3 x double> @v_constained_fma_v3f64_fpexcept_strict(<3 x double> %x, <3
   ret <3 x double> %val
 }
 
-define <4 x double> @v_constained_fma_v4f64_fpexcept_strict(<4 x double> %x, <4 x double> %y, <4 x double> %z) #0 {
+define <4 x double> @v_constained_fma_v4f64_fpexcept_strict(<4 x double> %x, <4 x double> %y, <4 x double> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v4f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -47,7 +47,7 @@ define <4 x double> @v_constained_fma_v4f64_fpexcept_strict(<4 x double> %x, <4
   ret <4 x double> %val
 }
 
-define double @v_constained_fma_f64_fpexcept_strict_fneg(double %x, double %y, double %z) #0 {
+define double @v_constained_fma_f64_fpexcept_strict_fneg(double %x, double %y, double %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -58,7 +58,7 @@ define double @v_constained_fma_f64_fpexcept_strict_fneg(double %x, double %y, d
   ret double %val
 }
 
-define double @v_constained_fma_f64_fpexcept_strict_fneg_fneg(double %x, double %y, double %z) #0 {
+define double @v_constained_fma_f64_fpexcept_strict_fneg_fneg(double %x, double %y, double %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -70,19 +70,19 @@ define double @v_constained_fma_f64_fpexcept_strict_fneg_fneg(double %x, double
   ret double %val
 }
 
-define double @v_constained_fma_f64_fpexcept_strict_fabs_fabs(double %x, double %y, double %z) #0 {
+define double @v_constained_fma_f64_fpexcept_strict_fabs_fabs(double %x, double %y, double %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fabs_fabs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_fma_f64 v[0:1], |v[0:1]|, |v[2:3]|, v[4:5]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
-  %neg.x = call double @llvm.fabs.f64(double %x) #0
-  %neg.y = call double @llvm.fabs.f64(double %y) #0
+  %neg.x = call double @llvm.fabs.f64(double %x) strictfp
+  %neg.y = call double @llvm.fabs.f64(double %y) strictfp
   %val = call double @llvm.experimental.constrained.fma.f64(double %neg.x, double %neg.y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret double %val
 }
 
-define <2 x double> @v_constained_fma_v2f64_fpexcept_strict_fneg_fneg(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 {
+define <2 x double> @v_constained_fma_v2f64_fpexcept_strict_fneg_fneg(<2 x double> %x, <2 x double> %y, <2 x double> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v2f64_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -100,5 +100,3 @@ declare double @llvm.experimental.constrained.fma.f64(double, double, double, me
 declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
 declare <3 x double> @llvm.experimental.constrained.fma.v3f64(<3 x double>, <3 x double>, <3 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index 1821d29d4b050b..438976758117a3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -3433,34 +3433,32 @@ define amdgpu_ps <2 x i128> @s_uaddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.uadd.sat.i7(i7, i7) #0
-declare i8 @llvm.uadd.sat.i8(i8, i8) #0
-declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) #0
-
-declare i16 @llvm.uadd.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.uadd.sat.v5i16(<5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.uadd.sat.v6i16(<6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) #0
-
-declare i24 @llvm.uadd.sat.i24(i24, i24) #0
-
-declare i32 @llvm.uadd.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.uadd.sat.v5i32(<5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) #0
-
-declare i48 @llvm.uadd.sat.i48(i48, i48) #0
-
-declare i64 @llvm.uadd.sat.i64(i64, i64) #0
-declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>) #0
-
-declare i128 @llvm.uadd.sat.i128(i128, i128) #0
-declare <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128>, <2 x i128>) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i7 @llvm.uadd.sat.i7(i7, i7) nounwind readnone speculatable willreturn
+declare i8 @llvm.uadd.sat.i8(i8, i8) nounwind readnone speculatable willreturn
+declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) nounwind readnone speculatable willreturn
+declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) nounwind readnone speculatable willreturn
+
+declare i16 @llvm.uadd.sat.i16(i16, i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16>, <3 x i16>) nounwind readnone speculatable willreturn
+declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone speculatable willreturn
+declare <5 x i16> @llvm.uadd.sat.v5i16(<5 x i16>, <5 x i16>) nounwind readnone speculatable willreturn
+declare <6 x i16> @llvm.uadd.sat.v6i16(<6 x i16>, <6 x i16>) nounwind readnone speculatable willreturn
+declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone speculatable willreturn
+
+declare i24 @llvm.uadd.sat.i24(i24, i24) nounwind readnone speculatable willreturn
+
+declare i32 @llvm.uadd.sat.i32(i32, i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone speculatable willreturn
+declare <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32>, <3 x i32>) nounwind readnone speculatable willreturn
+declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone speculatable willreturn
+declare <5 x i32> @llvm.uadd.sat.v5i32(<5 x i32>, <5 x i32>) nounwind readnone speculatable willreturn
+declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) nounwind readnone speculatable willreturn
+
+declare i48 @llvm.uadd.sat.i48(i48, i48) nounwind readnone speculatable willreturn
+
+declare i64 @llvm.uadd.sat.i64(i64, i64) nounwind readnone speculatable willreturn
+declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone speculatable willreturn
+
+declare i128 @llvm.uadd.sat.i128(i128, i128) nounwind readnone speculatable willreturn
+declare <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128>, <2 x i128>) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
index a60370cd460f9e..a5bd158a5765c8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
@@ -3301,34 +3301,32 @@ define amdgpu_ps <2 x i128> @s_usubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.usub.sat.i7(i7, i7) #0
-declare i8 @llvm.usub.sat.i8(i8, i8) #0
-declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) #0
-
-declare i16 @llvm.usub.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.usub.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.usub.sat.v5i16(<5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.usub.sat.v6i16(<6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) #0
-
-declare i24 @llvm.usub.sat.i24(i24, i24) #0
-
-declare i32 @llvm.usub.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.usub.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.usub.sat.v5i32(<5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>) #0
-
-declare i48 @llvm.usub.sat.i48(i48, i48) #0
-
-declare i64 @llvm.usub.sat.i64(i64, i64) #0
-declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>) #0
-
-declare i128 @llvm.usub.sat.i128(i128, i128) #0
-declare <2 x i128> @llvm.usub.sat.v2i128(<2 x i128>, <2 x i128>) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i7 @llvm.usub.sat.i7(i7, i7) nounwind readnone speculatable willreturn
+declare i8 @llvm.usub.sat.i8(i8, i8) nounwind readnone speculatable willreturn
+declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) nounwind readnone speculatable willreturn
+declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) nounwind readnone speculatable willreturn
+
+declare i16 @llvm.usub.sat.i16(i16, i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.usub.sat.v3i16(<3 x i16>, <3 x i16>) nounwind readnone speculatable willreturn
+declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone speculatable willreturn
+declare <5 x i16> @llvm.usub.sat.v5i16(<5 x i16>, <5 x i16>) nounwind readnone speculatable willreturn
+declare <6 x i16> @llvm.usub.sat.v6i16(<6 x i16>, <6 x i16>) nounwind readnone speculatable willreturn
+declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone speculatable willreturn
+
+declare i24 @llvm.usub.sat.i24(i24, i24) nounwind readnone speculatable willreturn
+
+declare i32 @llvm.usub.sat.i32(i32, i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone speculatable willreturn
+declare <3 x i32> @llvm.usub.sat.v3i32(<3 x i32>, <3 x i32>) nounwind readnone speculatable willreturn
+declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone speculatable willreturn
+declare <5 x i32> @llvm.usub.sat.v5i32(<5 x i32>, <5 x i32>) nounwind readnone speculatable willreturn
+declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>) nounwind readnone speculatable willreturn
+
+declare i48 @llvm.usub.sat.i48(i48, i48) nounwind readnone speculatable willreturn
+
+declare i64 @llvm.usub.sat.i64(i64, i64) nounwind readnone speculatable willreturn
+declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone speculatable willreturn
+
+declare i128 @llvm.usub.sat.i128(i128, i128) nounwind readnone speculatable willreturn
+declare <2 x i128> @llvm.usub.sat.v2i128(<2 x i128>, <2 x i128>) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
index 037210a496d6d6..dc7b0e7f5adb06 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
 
-define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX8-LABEL: constant_load_i8_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -42,7 +42,7 @@ define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX8-LABEL: constant_load_i16_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -81,7 +81,7 @@ define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX8-LABEL: sextload_i8_to_i32_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -124,7 +124,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX8-LABEL: sextload_i16_to_i32_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -167,7 +167,7 @@ define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX8-LABEL: zextload_i8_to_i32_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -210,7 +210,7 @@ define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX8-LABEL: zextload_i16_to_i32_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -253,7 +253,7 @@ define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX8-LABEL: constant_load_i8_align2:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -291,7 +291,7 @@ define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX8-LABEL: constant_load_i16_align2:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -329,7 +329,7 @@ define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX8-LABEL: constant_sextload_i8_align2:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -376,7 +376,7 @@ define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX8-LABEL: constant_zextload_i8_align2:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -422,5 +422,3 @@ define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, pt
   store i32 %zextload, ptr addrspace(1) %out, align 2
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll b/llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll
index a17ad6bd67512c..9d60a79c8c1361 100644
--- a/llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll
+++ b/llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll
@@ -4,11 +4,9 @@
 ; CHECK-NEXT: s_nop 0
 ; CHECK-NEXT: ;;#ASMEND
 
-define void @foo(ptr addrspace(5) %ptr) #0 {
+define void @foo(ptr addrspace(5) %ptr) "amdgpu-flat-work-group-size"="1,768" {
   %tmp = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm "s_nop 0", "=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65"(i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2)
   %tmp2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %tmp, 0
   store i32 %tmp2, ptr addrspace(5) %ptr, align 4
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,768" }
diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
index ae20ab1de3a2df..103e50cb913396 100644
--- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
@@ -13,7 +13,7 @@ declare hidden void @requires_all_inputs()
 ; This function incorrectly is marked with the hints that the callee
 ; does not require the implicit arguments to the function. Make sure
 ; we do not crash.
-define void @parent_func_missing_inputs() #0 {
+define void @parent_func_missing_inputs() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: parent_func_missing_inputs:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -44,7 +44,7 @@ define void @parent_func_missing_inputs() #0 {
   ret void
 }
 
-define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
+define amdgpu_kernel void @parent_kernel_missing_inputs() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs:
 ; FIXEDABI-SDAG:       ; %bb.0:
 ; FIXEDABI-SDAG-NEXT:    s_add_i32 s4, s4, s9
@@ -93,7 +93,7 @@ define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
 }
 
 ; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
-define void @marked_func_use_workitem_id(ptr addrspace(1) %ptr) #0 {
+define void @marked_func_use_workitem_id(ptr addrspace(1) %ptr) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id:
 ; FIXEDABI-SDAG:       ; %bb.0:
 ; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -131,7 +131,7 @@ define void @marked_func_use_workitem_id(ptr addrspace(1) %ptr) #0 {
 }
 
 ; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
-define amdgpu_kernel void @marked_kernel_use_workitem_id(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @marked_kernel_use_workitem_id(ptr addrspace(1) %ptr) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: marked_kernel_use_workitem_id:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -154,7 +154,7 @@ define amdgpu_kernel void @marked_kernel_use_workitem_id(ptr addrspace(1) %ptr)
   ret void
 }
 
-define void @marked_func_use_workgroup_id(ptr addrspace(1) %ptr) #0 {
+define void @marked_func_use_workgroup_id(ptr addrspace(1) %ptr) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: marked_func_use_workgroup_id:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -177,7 +177,7 @@ define void @marked_func_use_workgroup_id(ptr addrspace(1) %ptr) #0 {
   ret void
 }
 
-define amdgpu_kernel void @marked_kernel_use_workgroup_id(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @marked_kernel_use_workgroup_id(ptr addrspace(1) %ptr) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: marked_kernel_use_workgroup_id:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -203,7 +203,7 @@ define amdgpu_kernel void @marked_kernel_use_workgroup_id(ptr addrspace(1) %ptr)
   ret void
 }
 
-define void @marked_func_use_other_sgpr(ptr addrspace(1) %ptr) #0 {
+define void @marked_func_use_other_sgpr(ptr addrspace(1) %ptr) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: marked_func_use_other_sgpr:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -235,7 +235,7 @@ define void @marked_func_use_other_sgpr(ptr addrspace(1) %ptr) #0 {
   ret void
 }
 
-define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: marked_kernel_use_other_sgpr:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_add_u32 s0, s4, 8
@@ -259,7 +259,7 @@ define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) #
   ret void
 }
 
-define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 {
+define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    v_mov_b32_e32 v0, 0
@@ -272,7 +272,7 @@ define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 {
 }
 
 ; On gfx8, the queue ptr is required for this addrspacecast.
-define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) #0 {
+define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr:
 ; FIXEDABI-SDAG:       ; %bb.0:
 ; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -327,7 +327,7 @@ define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr
   ret void
 }
 
-define void @is_shared_requires_queue_ptr(ptr %ptr) #0 {
+define void @is_shared_requires_queue_ptr(ptr %ptr) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: is_shared_requires_queue_ptr:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -345,7 +345,7 @@ define void @is_shared_requires_queue_ptr(ptr %ptr) #0 {
   ret void
 }
 
-define void @is_private_requires_queue_ptr(ptr %ptr) #0 {
+define void @is_private_requires_queue_ptr(ptr %ptr) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: is_private_requires_queue_ptr:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -363,7 +363,7 @@ define void @is_private_requires_queue_ptr(ptr %ptr) #0 {
   ret void
 }
 
-define void @trap_requires_queue() #0 {
+define void @trap_requires_queue() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: trap_requires_queue:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -375,7 +375,7 @@ define void @trap_requires_queue() #0 {
   unreachable
 }
 
-define void @debugtrap_requires_queue() #0 {
+define void @debugtrap_requires_queue() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; FIXEDABI-LABEL: debugtrap_requires_queue:
 ; FIXEDABI:       ; %bb.0:
 ; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -399,7 +399,5 @@ declare i1 @llvm.amdgcn.is.private(ptr)
 declare void @llvm.trap()
 declare void @llvm.debugtrap()
 
-attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/acc-ldst.ll b/llvm/test/CodeGen/AMDGPU/acc-ldst.ll
index be4d6a2c278957..0fe8e6e5132394 100644
--- a/llvm/test/CodeGen/AMDGPU/acc-ldst.ll
+++ b/llvm/test/CodeGen/AMDGPU/acc-ldst.ll
@@ -14,7 +14,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 ; GCN-NEXT:    s_nop 2
 ; GCN-NOT:     v_accvgpr_read
 ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9:]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}]
-define amdgpu_kernel void @test_load_mfma_store16(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_load_mfma_store16(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -33,7 +33,7 @@ bb:
 ; GCN-NEXT: s_nop 2
 ; GCN-NOT:  v_accvgpr_read
 ; GCN-NEXT: global_store_dword v{{[0-9:]+}}, a[[N]], s[{{[0-9:]+}}]
-define amdgpu_kernel void @test_load1_mfma_store1(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_load1_mfma_store1(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %tid
@@ -52,7 +52,7 @@ bb:
 ; GCN-NEXT: s_nop 4
 ; GCN-NOT:  v_accvgpr_read
 ; GCN-NEXT: global_store_dwordx4 v{{[0-9:]+}}, [[A]], s[{{[0-9:]+}}]
-define amdgpu_kernel void @test_load4_mfma_store4(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_load4_mfma_store4(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %tid
@@ -66,7 +66,7 @@ bb:
 ; GCN-COUNT-8: global_load_dwordx4 v[{{[0-9:]+}}], v{{[0-9:]+}}, s[{{[0-9:]+}}]
 ; GCN-NOT:     v_accvgpr
 ; GCN-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-define amdgpu_kernel void @test_load_store(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_load_store(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -85,7 +85,7 @@ bb:
 ; GCN-NEXT:     s_nop 2
 ; GCN-NOT:      v_accvgpr_read
 ; GCN-COUNT-8:  global_store_dwordx4 v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_load_add_mfma_store(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_load_add_mfma_store(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -102,7 +102,7 @@ bb:
 ; GCN-COUNT-16: v_pk_add_f32
 ; GCN-NOT:      v_accvgpr
 ; GCN-COUNT-8:  global_store_dwordx4 v{{[0-9:]+}}, v[{{[0-9:]+}}]
-define amdgpu_kernel void @test_load_add_store(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_load_add_store(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -119,7 +119,7 @@ bb:
 ; GCN-COUNT-32: v_accvgpr_read
 ; GCN:          v_pk_add_f32
 ; GCN-COUNT-8:  global_store_dwordx4 v{{[0-9:]+}}, v[{{[0-9:]+}}]
-define amdgpu_kernel void @test_load_mfma_add_store(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_load_mfma_add_store(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -138,7 +138,7 @@ bb:
 ; GCN-COUNT-32: v_accvgpr_read
 ; GCN:          v_pk_mul_f32
 ; GCN-COUNT-8:  global_store_dwordx4 v{{[0-9:]+}}, v[{{[0-9:]+}}]
-define amdgpu_kernel void @test_load_add_mfma_mul_store(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_load_add_mfma_mul_store(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -157,7 +157,7 @@ bb:
 ; GCN-COUNT-32: v_accvgpr_read
 ; GCN:          v_pk_mul_f32
 ; GCN-COUNT-8:  global_store_dwordx4 v{{[0-9:]+}}, v[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mixeduse_load_add_mfma_mul_store(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mixeduse_load_add_mfma_mul_store(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -175,7 +175,7 @@ bb:
 ; GCN:         v_mfma_f32_32x32x1f32
 ; GCN-NOT:     v_accvgpr_read
 ; GCN-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_multiuse_load_mfma_mfma_store(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_multiuse_load_mfma_mfma_store(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -199,7 +199,7 @@ bb:
 ; GCN:     v_accvgpr_read_b32 [[V:v[0-9]+]], a[[N]]{{$}}
 ; GCN:     global_atomic_add v{{[0-9]+}}, v{{[0-9:]+}}, [[V]], s[{{[0-9:]+}}] glc
 ; GCN:     global_store_dword v{{[0-9]+}}, v{{[0-9]+}},
-define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic_store(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic_store(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %tid
@@ -222,7 +222,7 @@ bb:
 ; GCN:         v_accvgpr_read_b32 v{{[0-9]+}}, a{{[0-9]+}}
 ; GCN:         v_accvgpr_read_b32 v{{[0-9]+}}, a{{[0-9]+}}
 ; GCN:         global_atomic_add_x2 v[{{[0-9:]+}}], v{{[0-9:]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}] glc
-define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic64_store(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_atomic_mfma_4xi32_atomic64_store(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tid
@@ -249,7 +249,7 @@ bb:
 ; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-NOT: v_accvgpr_read
 ; GCN:     ds_write_b32 v{{[0-9]+}}, a[[N]] offset:128
-define amdgpu_kernel void @test_load_mfma_ds2_store(ptr addrspace(3) %arg) #0 {
+define amdgpu_kernel void @test_load_mfma_ds2_store(ptr addrspace(3) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.1 = getelementptr inbounds <4 x i32>, ptr addrspace(3) %arg, i32 %tid
@@ -268,7 +268,7 @@ bb:
 ; GCN:     v_mfma_i32_4x4x4i8 [[RES:a\[[0-9:]+\]]], v{{[0-9:]+}}, v{{[0-9:]+}}, [[IN]]
 ; GCN-NOT: v_accvgpr_read
 ; GCN:     global_store_dwordx4 v[{{[0-9:]+}}], [[RES]],
-define amdgpu_kernel void @test_mfma_loop_4xi32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_loop_4xi32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %tid
@@ -295,7 +295,7 @@ exit:
 ; GCN-NOT:     v_accvgpr_read
 ; GCN-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}],
 ; GCN:         s_endpgm
-define amdgpu_kernel void @test_mfma_loop_32xfloat(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_loop_32xfloat(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -314,5 +314,3 @@ exit:
   store <32 x float> %mai.1, ptr addrspace(1) %gep
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
index 63b8cb6ffcaae5..38086129a83a0f 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
@@ -4,46 +4,44 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx940 -run-pass postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX940 %s
 
 --- |
-    define amdgpu_kernel void @a_to_v() #0 { ret void }
-    define amdgpu_kernel void @a2_to_v2() #0 { ret void }
-    define amdgpu_kernel void @a3_to_v3() #0 { ret void }
-    define amdgpu_kernel void @a4_to_v4() #0 { ret void }
-    define amdgpu_kernel void @a8_to_v8() #0 { ret void }
-    define amdgpu_kernel void @a16_to_v16() #0 { ret void }
+    define amdgpu_kernel void @a_to_v() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a2_to_v2() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a3_to_v3() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a4_to_v4() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a8_to_v8() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a16_to_v16() "amdgpu-flat-work-group-size"="1,256" { ret void }
 
-    define amdgpu_kernel void @v_to_a() #0 { ret void }
-    define amdgpu_kernel void @v2_to_a2() #0 { ret void }
-    define amdgpu_kernel void @v3_to_a3() #0 { ret void }
-    define amdgpu_kernel void @v4_to_a4() #0 { ret void }
-    define amdgpu_kernel void @v8_to_a8() #0 { ret void }
-    define amdgpu_kernel void @v16_to_a16() #0 { ret void }
+    define amdgpu_kernel void @v_to_a() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @v2_to_a2() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @v3_to_a3() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @v4_to_a4() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @v8_to_a8() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @v16_to_a16() "amdgpu-flat-work-group-size"="1,256" { ret void }
 
-    define amdgpu_kernel void @s_to_a() #0 { ret void }
-    define amdgpu_kernel void @s2_to_a2() #0 { ret void }
-    define amdgpu_kernel void @s3_to_a3() #0 { ret void }
-    define amdgpu_kernel void @s4_to_a4() #0 { ret void }
-    define amdgpu_kernel void @s6_to_a6() #0 { ret void }
-    define amdgpu_kernel void @s8_to_a8() #0 { ret void }
-    define amdgpu_kernel void @s16_to_a16() #0 { ret void }
+    define amdgpu_kernel void @s_to_a() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @s2_to_a2() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @s3_to_a3() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @s4_to_a4() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @s6_to_a6() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @s8_to_a8() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @s16_to_a16() "amdgpu-flat-work-group-size"="1,256" { ret void }
 
-    define amdgpu_kernel void @a_to_a() #0 { ret void }
-    define amdgpu_kernel void @a2_to_a2() #0 { ret void }
-    define amdgpu_kernel void @a2_to_a2_kill() #0 { ret void }
-    define amdgpu_kernel void @a2_to_a2_implicit_defs() #0 { ret void }
-    define amdgpu_kernel void @a3_to_a3_nonoverlap_kill() #0 { ret void }
-    define amdgpu_kernel void @a3_to_a3_overlap_kill() #0 { ret void }
-    define amdgpu_kernel void @a4_to_a4() #0 { ret void }
-    define amdgpu_kernel void @a4_to_a4_overlap() #0 { ret void }
-    define amdgpu_kernel void @a8_to_a8() #0 { ret void }
-    define amdgpu_kernel void @a16_to_a16() #0 { ret void }
+    define amdgpu_kernel void @a_to_a() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a2_to_a2() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a2_to_a2_kill() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a2_to_a2_implicit_defs() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a3_to_a3_nonoverlap_kill() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a3_to_a3_overlap_kill() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a4_to_a4() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a4_to_a4_overlap() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a8_to_a8() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @a16_to_a16() "amdgpu-flat-work-group-size"="1,256" { ret void }
 
-    define amdgpu_kernel void @a_to_a_spill() #0 { ret void }
-    define amdgpu_kernel void @copy_sgpr_to_agpr_tuple() #0 { ret void }
-    define amdgpu_kernel void @copy_sgpr_to_agpr_tuple_kill() #0 { ret void }
-    define amdgpu_kernel void @copy_agpr_to_agpr_tuple() #0 { ret void }
-    define amdgpu_kernel void @copy_agpr_to_agpr_tuple_kill() #0 { ret void }
-
-    attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
+    define amdgpu_kernel void @a_to_a_spill() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @copy_sgpr_to_agpr_tuple() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @copy_sgpr_to_agpr_tuple_kill() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @copy_agpr_to_agpr_tuple() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @copy_agpr_to_agpr_tuple_kill() "amdgpu-flat-work-group-size"="1,256" { ret void }
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/add.i16.ll b/llvm/test/CodeGen/AMDGPU/add.i16.ll
index 417ff54e565684..035eac006ed5d6 100644
--- a/llvm/test/CodeGen/AMDGPU/add.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.i16.ll
@@ -6,7 +6,7 @@
 ; VI: flat_load_ushort [[B:v[0-9]+]]
 ; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; VI-NEXT: buffer_store_short [[ADD]]
-define amdgpu_kernel void @v_test_add_i16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_i16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -23,7 +23,7 @@ define amdgpu_kernel void @v_test_add_i16(ptr addrspace(1) %out, ptr addrspace(1
 ; VI: flat_load_ushort [[A:v[0-9]+]]
 ; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0x7b, [[A]]
 ; VI-NEXT: buffer_store_short [[ADD]]
-define amdgpu_kernel void @v_test_add_i16_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_add_i16_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -38,7 +38,7 @@ define amdgpu_kernel void @v_test_add_i16_constant(ptr addrspace(1) %out, ptr ad
 ; VI: flat_load_ushort [[A:v[0-9]+]]
 ; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xfcb3, [[A]]
 ; VI-NEXT: buffer_store_short [[ADD]]
-define amdgpu_kernel void @v_test_add_i16_neg_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_add_i16_neg_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -53,7 +53,7 @@ define amdgpu_kernel void @v_test_add_i16_neg_constant(ptr addrspace(1) %out, pt
 ; VI: flat_load_ushort [[A:v[0-9]+]]
 ; VI: v_add_u16_e32 [[ADD:v[0-9]+]], -1, [[A]]
 ; VI-NEXT: buffer_store_short [[ADD]]
-define amdgpu_kernel void @v_test_add_i16_inline_neg1(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_add_i16_inline_neg1(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -69,7 +69,7 @@ define amdgpu_kernel void @v_test_add_i16_inline_neg1(ptr addrspace(1) %out, ptr
 ; VI: flat_load_ushort [[B:v[0-9]+]]
 ; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; VI-NEXT: buffer_store_dword [[ADD]]
-define amdgpu_kernel void @v_test_add_i16_zext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_i16_zext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -88,7 +88,7 @@ define amdgpu_kernel void @v_test_add_i16_zext_to_i32(ptr addrspace(1) %out, ptr
 ; VI: flat_load_ushort [[B:v[0-9]+]]
 ; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
 ; VI: buffer_store_dwordx2 v[[[ADD]]:{{[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @v_test_add_i16_zext_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_i16_zext_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i64, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -108,7 +108,7 @@ define amdgpu_kernel void @v_test_add_i16_zext_to_i64(ptr addrspace(1) %out, ptr
 ; VI: v_add_u16_e32 [[ADD:v[0-9]+]],  [[A]], [[B]]
 ; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
 ; VI-NEXT: buffer_store_dword [[SEXT]]
-define amdgpu_kernel void @v_test_add_i16_sext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_i16_sext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -129,7 +129,7 @@ define amdgpu_kernel void @v_test_add_i16_sext_to_i32(ptr addrspace(1) %out, ptr
 ; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
 ; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
 ; VI-NEXT: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_test_add_i16_sext_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_i16_sext_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i64, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -142,7 +142,4 @@ define amdgpu_kernel void @v_test_add_i16_sext_to_i64(ptr addrspace(1) %out, ptr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/add.ll b/llvm/test/CodeGen/AMDGPU/add.ll
index 422e2747094ce2..2da2718c526ec1 100644
--- a/llvm/test/CodeGen/AMDGPU/add.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.ll
@@ -6,7 +6,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
 
-define amdgpu_kernel void @s_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @s_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: s_add_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -768,7 +768,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_add_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -879,7 +879,7 @@ define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @v_add_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_add_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_add_imm_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1424,7 +1424,4 @@ define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
index 6f67ce4de9ce54..1c5aea7bb9340e 100644
--- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -6,7 +6,7 @@
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
 ; FIXME: VI or should be unnecessary
-define amdgpu_kernel void @v_test_add_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; VI-LABEL: v_test_add_v2i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -90,7 +90,7 @@ define amdgpu_kernel void @v_test_add_v2i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @s_test_add_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in0, ptr addrspace(4) %in1) #1 {
+define amdgpu_kernel void @s_test_add_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in0, ptr addrspace(4) %in1) nounwind {
 ; VI-LABEL: s_test_add_v2i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -162,7 +162,7 @@ define amdgpu_kernel void @s_test_add_v2i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @s_test_add_self_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in0) #1 {
+define amdgpu_kernel void @s_test_add_self_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in0) nounwind {
 ; VI-LABEL: s_test_add_self_v2i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -223,7 +223,7 @@ define amdgpu_kernel void @s_test_add_self_v2i16(ptr addrspace(1) %out, ptr addr
 }
 
 ; FIXME: VI should not scalarize arg access.
-define amdgpu_kernel void @s_test_add_v2i16_kernarg(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #1 {
+define amdgpu_kernel void @s_test_add_v2i16_kernarg(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) nounwind {
 ; VI-LABEL: s_test_add_v2i16_kernarg:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -276,7 +276,7 @@ define amdgpu_kernel void @s_test_add_v2i16_kernarg(ptr addrspace(1) %out, <2 x
 }
 
 ; FIXME: Eliminate or with sdwa
-define amdgpu_kernel void @v_test_add_v2i16_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_add_v2i16_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; VI-LABEL: v_test_add_v2i16_constant:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -343,7 +343,7 @@ define amdgpu_kernel void @v_test_add_v2i16_constant(ptr addrspace(1) %out, ptr
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_add_v2i16_neg_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_add_v2i16_neg_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; VI-LABEL: v_test_add_v2i16_neg_constant:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -409,7 +409,7 @@ define amdgpu_kernel void @v_test_add_v2i16_neg_constant(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; VI-LABEL: v_test_add_v2i16_inline_neg1:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -474,7 +474,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; VI-LABEL: v_test_add_v2i16_inline_lo_zero_hi:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -539,7 +539,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(ptr addrspace(1) %
 }
 
 ; The high element gives fp
-define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; VI-LABEL: v_test_add_v2i16_inline_fp_split:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -604,7 +604,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(ptr addrspace(1) %ou
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; VI-LABEL: v_test_add_v2i16_zext_to_v2i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -696,7 +696,7 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(ptr addrspace(1) %out,
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; VI-LABEL: v_test_add_v2i16_zext_to_v2i64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -792,7 +792,7 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(ptr addrspace(1) %out,
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; VI-LABEL: v_test_add_v2i16_sext_to_v2i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -886,7 +886,7 @@ define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i32(ptr addrspace(1) %out,
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; VI-LABEL: v_test_add_v2i16_sext_to_v2i64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -1049,7 +1049,4 @@ define <2 x i16> @add_inline_imm_1_0(<2 x i16> %x) {
   ret <2 x i16> %y
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
index 4c98a1fba6e806..03c69515d4040b 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-captured.ll
@@ -3,14 +3,14 @@
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
 
-declare void @consume_ptr2int(i32) #0
+declare void @consume_ptr2int(i32) nounwind
 
 ; CHECK-LABEL: @addrspacecast_captured(
 ; CHECK: %data = alloca i32, align 4, addrspace(5)
 ; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr
 ; CHECK: %ptr2int = ptrtoint ptr %cast to i32
 ; CHECK: store i32 %ptr2int, ptr addrspace(1) %out
-define amdgpu_kernel void @addrspacecast_captured(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @addrspacecast_captured(ptr addrspace(1) %out) nounwind {
 entry:
   %data = alloca i32, align 4, addrspace(5)
   %cast = addrspacecast ptr addrspace(5) %data to ptr
@@ -23,7 +23,7 @@ entry:
 ; CHECK: %data = alloca i32, align 4, addrspace(5)
 ; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr
 ; CHECK: store ptr %cast, ptr addrspace(1) %out
-define amdgpu_kernel void @addrspacecast_captured_store(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @addrspacecast_captured_store(ptr addrspace(1) %out) nounwind {
 entry:
   %data = alloca i32, align 4, addrspace(5)
   %cast = addrspacecast ptr addrspace(5) %data to ptr
@@ -36,7 +36,7 @@ entry:
 ; CHECK: %cast = addrspacecast ptr addrspace(5) %data to ptr
 ; CHECK: %ptr2int = ptrtoint ptr %cast to i32
 ; CHECK: call void @consume_ptr2int(i32 %ptr2int)
-define amdgpu_kernel void @addrspacecast_captured_call() #0 {
+define amdgpu_kernel void @addrspacecast_captured_call() nounwind {
 entry:
   %data = alloca i32, align 4, addrspace(5)
   %cast = addrspacecast ptr addrspace(5) %data to ptr
@@ -44,5 +44,3 @@ entry:
   call void @consume_ptr2int(i32 %ptr2int)
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index cff9ce05066793..211df545287e4c 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
 
-declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i32, i1) #0
+declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i32, i1) argmemonly nounwind
 
 @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
@@ -16,7 +16,7 @@ declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4)
 ; HSA: @global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
 ; HSA: @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
 ;.
-define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
+define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() nounwind {
 ; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
 ; HSA-SAME: () #[[ATTR1:[0-9]+]] {
 ; HSA-NEXT:    store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 4
@@ -26,7 +26,7 @@ define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
   ret void
 }
 
-define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
+define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
@@ -41,7 +41,7 @@ define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
   ret void
 }
 
-define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
+define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
@@ -56,7 +56,7 @@ define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
   ret void
 }
 
-define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
+define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
@@ -71,7 +71,7 @@ define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
   ret void
 }
 
-define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
+define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() nounwind {
 ; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat
 ; HSA-SAME: () #[[ATTR1]] {
 ; HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.i32 to ptr addrspace(4)), align 4
@@ -81,7 +81,7 @@ define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
   ret void
 }
 
-define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
+define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() nounwind {
 ; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat
 ; HSA-SAME: () #[[ATTR1]] {
 ; HSA-NEXT:    store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(4)), i64 0, i64 8), align 4
@@ -91,7 +91,7 @@ define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
   ret void
 }
 
-define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
 ; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
@@ -109,7 +109,7 @@ define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
 ; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
@@ -127,7 +127,7 @@ define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(ptr addr
   ret void
 }
 
-define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
 ; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
@@ -148,7 +148,7 @@ define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
 ; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
@@ -164,7 +164,7 @@ define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(ptr addrspa
 }
 
 ; Can't just search the pointer value
-define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
 ; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
@@ -180,7 +180,7 @@ define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(ptr addr
 }
 
 ; Can't just search pointer types
-define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
 ; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 8
@@ -196,7 +196,7 @@ define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
 }
 
 ; Cast group to flat, do GEP, cast back to group
-define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
+define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
@@ -211,7 +211,7 @@ define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #
   ret void
 }
 
-define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
+define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
@@ -223,9 +223,6 @@ define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
   ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
 }
 
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll
index 4e0fc580afdd05..ee0ffb48d2758d 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.gfx6.ll
@@ -106,7 +106,7 @@ define ptr @constant32bit_to_flat_addrspacecast_0(ptr addrspace(6) %ptr) {
   ret ptr %stof
 }
 
-define ptr @constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) #0 {
+define ptr @constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) "amdgpu-32bit-address-high-bits"="0xffff8000" {
 ; CHECK-LABEL: constant32bit_to_flat_addrspacecast_1:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -127,7 +127,7 @@ define ptr @constant32bit_to_flat_addrspacecast_null() {
   ret ptr %stof
 }
 
-define ptr @constant32bit_to_flat_addrspacecast_undef() #0 {
+define ptr @constant32bit_to_flat_addrspacecast_undef() "amdgpu-32bit-address-high-bits"="0xffff8000" {
 ; SDAG-LABEL: constant32bit_to_flat_addrspacecast_undef:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -142,7 +142,7 @@ define ptr @constant32bit_to_flat_addrspacecast_undef() #0 {
   ret ptr %stof
 }
 
-define ptr @constant32bit_to_flat_addrspacecast_poison() #0 {
+define ptr @constant32bit_to_flat_addrspacecast_poison() "amdgpu-32bit-address-high-bits"="0xffff8000" {
 ; SDAG-LABEL: constant32bit_to_flat_addrspacecast_poison:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -157,7 +157,7 @@ define ptr @constant32bit_to_flat_addrspacecast_poison() #0 {
   ret ptr %stof
 }
 
-define ptr @constant32bit_to_flat_addrspacecast_constant() #0 {
+define ptr @constant32bit_to_flat_addrspacecast_constant() "amdgpu-32bit-address-high-bits"="0xffff8000" {
 ; CHECK-LABEL: constant32bit_to_flat_addrspacecast_constant:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -205,7 +205,5 @@ define ptr addrspace(6) @addrspacecast_flat_null_to_constant32bit() {
   ret ptr addrspace(6) addrspacecast (ptr null to ptr addrspace(6))
 }
 
-attributes #0 = { "amdgpu-32bit-address-high-bits"="0xffff8000" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 50423c59eabe94..c1c17c61334d7e 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -29,7 +29,7 @@
 ; number SGPR.
 
 ; HSA: NumSgprs: {{[0-9]+}}
-define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) nounwind {
   %stof = addrspacecast ptr addrspace(3) %ptr to ptr
   store volatile i32 7, ptr %stof
   ret void
@@ -53,7 +53,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
 ; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, v[[VREG_HIBASE]], vcc
 
 ; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
-define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
+define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) nounwind {
   %stof = addrspacecast ptr addrspace(3) %ptr to ptr
   store volatile i32 7, ptr %stof
   ret void
@@ -85,7 +85,7 @@ define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
 ; GFX9: .amdhsa_user_sgpr_queue_ptr 0
 
 ; HSA: NumSgprs: {{[0-9]+}}
-define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
+define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) nounwind {
   %stof = addrspacecast ptr addrspace(5) %ptr to ptr
   store volatile i32 7, ptr %stof
   ret void
@@ -101,7 +101,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
 ; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
 
 ; HSA:  .amdhsa_user_sgpr_queue_ptr 0
-define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) nounwind {
   %stof = addrspacecast ptr addrspace(1) %ptr to ptr
   store volatile i32 7, ptr %stof
   ret void
@@ -113,7 +113,7 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt
 ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
 ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
 ; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
-define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) nounwind {
   %stof = addrspacecast ptr addrspace(4) %ptr to ptr
   %ld = load volatile i32, ptr %stof
   ret void
@@ -127,7 +127,7 @@ define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %
 
 ; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; GFX9: global_load_dword v{{[0-9]+}}, [[ZERO:v[0-9]+]], s[[[PTRLO]]:[[PTRHI]]]
-define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4) %ptr) nounwind {
   %stof = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(1)
   %ld = load volatile i32, ptr addrspace(1) %stof
   ret void
@@ -150,7 +150,7 @@ define amdgpu_kernel void @use_constant_to_global_addrspacecast(ptr addrspace(4)
 ; HSA:  .amdhsa_user_sgpr_private_segment_buffer 1
 ; HSA:  .amdhsa_user_sgpr_dispatch_ptr 0
 ; HSA:  .amdhsa_user_sgpr_queue_ptr 0
-define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
+define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) nounwind {
   %ftos = addrspacecast ptr %ptr to ptr addrspace(3)
   store volatile i32 0, ptr addrspace(3) %ftos
   ret void
@@ -176,7 +176,7 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #0 {
 ; HSA:  .amdhsa_user_sgpr_private_segment_buffer 1
 ; HSA:  .amdhsa_user_sgpr_dispatch_ptr 0
 ; HSA:  .amdhsa_user_sgpr_queue_ptr 0
-define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
+define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) nounwind {
   %ftos = addrspacecast ptr %ptr to ptr addrspace(5)
   store volatile i32 0, ptr addrspace(5) %ftos
   ret void
@@ -194,7 +194,7 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #0 {
 ; GFX9: global_store_dword [[ZERO]], [[ZERO]], s[[[PTRLO]]:[[PTRHI]]{{\]$}}
 
 ; HSA:  .amdhsa_user_sgpr_queue_ptr 0
-define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 {
+define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) nounwind {
   %ftos = addrspacecast ptr %ptr to ptr addrspace(1)
   store volatile i32 0, ptr addrspace(1) %ftos
   ret void
@@ -206,7 +206,7 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #0 {
 ; HSA: s_load_dword s{{[0-9]+}}, s[[[PTRLO]]:[[PTRHI]]], 0x0
 
 ; HSA:  .amdhsa_user_sgpr_queue_ptr 0
-define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
+define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) nounwind {
   %ftos = addrspacecast ptr %ptr to ptr addrspace(4)
   load volatile i32, ptr addrspace(4) %ftos
   ret void
@@ -221,7 +221,7 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
-define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
+define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() nounwind {
   %cast = addrspacecast ptr addrspace(3) null to ptr
   store volatile i32 7, ptr %cast
   ret void
@@ -231,7 +231,7 @@ define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
 ; HSA: ds_write_b32 [[PTR]], [[K]]
-define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
+define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() nounwind {
   %cast = addrspacecast ptr null to ptr addrspace(3)
   store volatile i32 7, ptr addrspace(3) %cast
   ret void
@@ -242,7 +242,7 @@ define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
 ; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
-define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
+define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() nounwind {
   %cast = addrspacecast ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr
   store volatile i32 7, ptr %cast
   ret void
@@ -252,7 +252,7 @@ define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
 ; HSA: ds_write_b32 [[PTR]], [[K]]
-define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
+define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() nounwind {
   %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(3)
   store volatile i32 7, ptr addrspace(3) %cast
   ret void
@@ -268,7 +268,7 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
 ; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
-define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
+define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() nounwind {
   %cast = addrspacecast ptr addrspace(5) null to ptr
   store volatile i32 7, ptr %cast
   ret void
@@ -278,7 +278,7 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
 ; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
-define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
+define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() nounwind {
   %cast = addrspacecast ptr null to ptr addrspace(5)
   store volatile i32 7, ptr addrspace(5) %cast
   ret void
@@ -294,7 +294,7 @@ define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
 
 ; CI:  .amdhsa_user_sgpr_queue_ptr 1
 ; GFX9:  .amdhsa_user_sgpr_queue_ptr 0
-define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
+define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() nounwind {
   %cast = addrspacecast ptr addrspace(5) inttoptr (i32 -1 to ptr addrspace(5)) to ptr
   store volatile i32 7, ptr %cast
   ret void
@@ -304,7 +304,7 @@ define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
 ; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
 ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
 ; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
-define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 {
+define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() nounwind {
   %cast = addrspacecast ptr inttoptr (i64 -1 to ptr) to ptr addrspace(5)
   store volatile i32 7, ptr addrspace(5) %cast
   ret void
@@ -317,7 +317,7 @@ define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 {
 ; HSA-LABEL: {{^}}branch_use_flat_i32:
 ; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}
 ; HSA: s_endpgm
-define amdgpu_kernel void @branch_use_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 %x, i32 %c) #0 {
+define amdgpu_kernel void @branch_use_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 %x, i32 %c) nounwind {
 entry:
   %cmp = icmp ne i32 %c, 0
   br i1 %cmp, label %local, label %global
@@ -350,14 +350,14 @@ end:
 ; HSA: {{flat|global}}_store_dword
 ; HSA: s_barrier
 ; HSA: {{flat|global}}_load_dword
-define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) #0 {
+define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) nounwind {
   %alloca = alloca i32, i32 9, align 4, addrspace(5)
-  %x = call i32 @llvm.amdgcn.workitem.id.x() #2
+  %x = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %pptr = getelementptr i32, ptr addrspace(5) %alloca, i32 %x
   %fptr = addrspacecast ptr addrspace(5) %pptr to ptr
   store volatile i32 %x, ptr %fptr
   ; Dummy call
-  call void @llvm.amdgcn.s.barrier() #1
+  call void @llvm.amdgcn.s.barrier() nounwind convergent
   %reload = load volatile i32, ptr %fptr, align 4
   store volatile i32 %reload, ptr addrspace(1) %out, align 4
   ret void
@@ -370,7 +370,7 @@ define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32
 ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
 ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
 ; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
-define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
+define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) nounwind {
   %ptr = load volatile ptr addrspace(4), ptr addrspace(4) %ptr.ptr
   %addrspacecast = addrspacecast ptr addrspace(4) %ptr to ptr addrspace(6)
   %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
@@ -385,7 +385,7 @@ define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(ptr addrspac
 ; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}}
 ; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]]
 ; GFX9: s_load_dword s{{[0-9]+}}, s[[[PTR_LO]]:[[PTR_HI]]], 0x0{{$}}
-define amdgpu_kernel void @use_global_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) #0 {
+define amdgpu_kernel void @use_global_to_constant32_addrspacecast(ptr addrspace(4) %ptr.ptr, i32 %offset) nounwind {
   %ptr = load volatile ptr addrspace(1), ptr addrspace(4) %ptr.ptr
   %addrspacecast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(6)
   %gep = getelementptr i8, ptr addrspace(6) %addrspacecast, i32 %offset
@@ -398,7 +398,7 @@ define amdgpu_kernel void @use_global_to_constant32_addrspacecast(ptr addrspace(
 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
 ; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(ptr addrspace(6) %ptr) #0 {
+define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(ptr addrspace(6) %ptr) nounwind {
   %stof = addrspacecast ptr addrspace(6) %ptr to ptr
   %load = load volatile i32, ptr %stof
   ret void
@@ -409,19 +409,14 @@ define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(ptr addrspa
 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0xffff8000
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
 ; GCN: flat_load_dword v{{[0-9]+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) #3 {
+define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspace(6) %ptr) nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" {
   %stof = addrspacecast ptr addrspace(6) %ptr to ptr
   %load = load volatile i32, ptr %stof
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind convergent }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" }
+declare void @llvm.amdgcn.s.barrier() nounwind convergent
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index ec4e2dda10d3a6..aab6bf17b3c065 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -5,7 +5,7 @@
 ; GCN-NOT: v1
 ; GCN-NOT: v0
 ; GCN: buffer_store_dword v0
-define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
+define amdgpu_ps void @adjust_writemask_crash_0_nochain() nounwind {
 main_body:
   %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
@@ -21,7 +21,7 @@ main_body:
 ; GCN-NOT: v1
 ; GCN-NOT: v0
 ; GCN: buffer_store_dword v0
-define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
+define amdgpu_ps void @adjust_writemask_crash_1_nochain() nounwind {
 main_body:
   %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
@@ -37,7 +37,7 @@ main_body:
 ; GCN-NOT: v1
 ; GCN-NOT: v0
 ; GCN: buffer_store_dword v0
-define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
+define amdgpu_ps void @adjust_writemask_crash_0_chain() nounwind {
 main_body:
   %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
@@ -53,7 +53,7 @@ main_body:
 ; GCN-NOT: v1
 ; GCN-NOT: v0
 ; GCN: buffer_store_dword v0
-define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
+define amdgpu_ps void @adjust_writemask_crash_1_chain() nounwind {
 main_body:
   %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
@@ -64,7 +64,7 @@ main_body:
   ret void
 }
 
-define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
+define amdgpu_ps void @adjust_writemask_crash_0_v4() nounwind {
 main_body:
   %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
@@ -76,9 +76,6 @@ main_body:
 }
 
 
-declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 60f61a67ccf0be..86473dab57e9d2 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -4,7 +4,7 @@
 
 ; This testcase would fail on GFX908 due to not having a free VGPR available to
 ; copy between AGPRs.
-define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
+define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) "amdgpu-waves-per-eu"="6,6" {
 ; GFX908-LABEL: no_free_vgprs_at_agpr_to_agpr_copy:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -242,7 +242,7 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
 }
 
 ; Check that we do make use of v32 if there are no AGPRs present in the function
-define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) "amdgpu-waves-per-eu"="6,6" {
 ; GFX908-LABEL: no_agpr_no_reserve:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -369,7 +369,7 @@ define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
 ; FIXME: This case is broken. The asm value passed in v32 is live
 ; through the range where the reserved def for the copy is introduced,
 ; clobbering the user value.
-define void @v32_asm_def_use(float %v0, float %v1) #0 {
+define void @v32_asm_def_use(float %v0, float %v1) "amdgpu-waves-per-eu"="6,6" {
 ; GFX908-LABEL: v32_asm_def_use:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -510,7 +510,7 @@ define void @v32_asm_def_use(float %v0, float %v1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg2, i64 %arg3, <2 x half> %arg4, <2 x half> %arg5) #3 {
+define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg2, i64 %arg3, <2 x half> %arg4, <2 x half> %arg5) "amdgpu-waves-per-eu"="7,7" {
 ; GFX908-LABEL: introduced_copy_to_sgpr:
 ; GFX908:       ; %bb.0: ; %bb
 ; GFX908-NEXT:    global_load_ushort v16, v[0:1], off glc
@@ -910,7 +910,7 @@ bb58:                                             ; preds = %bb51, %bb16
 
 ; This testcase would fail on GFX908 due to not having a free VGPR available to
 ; copy SGPR to AGPR.
-define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 {
+define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) "amdgpu-waves-per-eu"="6,6" {
 ; GFX908-LABEL: no_free_vgprs_at_sgpr_to_agpr_copy:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1149,10 +1149,5 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 {
   ret void
 }
 
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { "amdgpu-waves-per-eu"="6,6" }
-attributes #1 = { convergent nounwind readnone willreturn }
-attributes #2 = { nounwind readnone willreturn }
-attributes #3 = { "amdgpu-waves-per-eu"="7,7" }
+declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) convergent nounwind readnone willreturn
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-csr.ll b/llvm/test/CodeGen/AMDGPU/agpr-csr.ll
index 73ccab64a19254..316fb97e7cccf8 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-csr.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-csr.ll
@@ -5,7 +5,7 @@
 ; GCN-NOT: buffer_
 ; GCN-NOT: v_accvgpr
 ; GCN:     s_setpc_b64
-define void @func_empty() #0 {
+define void @func_empty() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   ret void
 }
 
@@ -16,7 +16,7 @@ define void @func_empty() #0 {
 ; GCN-NOT: buffer_
 ; GCN-NOT: v_accvgpr
 ; GCN: s_setpc_b64
-define void @func_areg_4() #0 {
+define void @func_areg_4() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void asm sideeffect "; use agpr3", "~{a3}" ()
   ret void
 }
@@ -28,7 +28,7 @@ define void @func_areg_4() #0 {
 ; GCN-NOT: buffer_
 ; GCN-NOT: v_accvgpr
 ; GCN: s_setpc_b64
-define void @func_areg_32() #0 {
+define void @func_areg_32() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void asm sideeffect "; use agpr31", "~{a31}" ()
   ret void
 }
@@ -42,7 +42,7 @@ define void @func_areg_32() #0 {
 ; GFX90A: v_accvgpr_write_b32 a32, v0 ; Reload Reuse
 ; GCN-NOT: a32
 ; GCN:        s_setpc_b64
-define void @func_areg_33() #0 {
+define void @func_areg_33() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void asm sideeffect "; use agpr32", "~{a32}" ()
   ret void
 }
@@ -55,7 +55,7 @@ define void @func_areg_33() #0 {
 ; GFX90A: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
 ; GCN-NOT:    v_accvgpr
 ; GCN:        s_setpc_b64
-define void @func_areg_64() #0 {
+define void @func_areg_64() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void asm sideeffect "; use agpr63", "~{a63}" ()
   ret void
 }
@@ -70,12 +70,12 @@ define void @func_areg_64() #0 {
 ; GFX908-NOT: v_accvgpr
 ; GFX908-NOT: buffer
 ; GCN:        s_setpc_b64
-define void @func_areg_31_63() #0 {
+define void @func_areg_31_63() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void asm sideeffect "; use agpr31, agpr63", "~{a31},~{a63}" ()
   ret void
 }
 
-declare void @func_unknown() #0
+declare void @func_unknown() nounwind noinline "amdgpu-flat-work-group-size"="1,512"
 
 ; GCN-LABEL: {{^}}test_call_empty:
 ; GCN-NOT:         buffer_
@@ -90,7 +90,7 @@ declare void @func_unknown() #0
 ; GFX908-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
 ; GCN:             s_endpgm
-define amdgpu_kernel void @test_call_empty() #0 {
+define amdgpu_kernel void @test_call_empty() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
   call void @func_empty()
@@ -112,7 +112,7 @@ bb:
 ; GFX908-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
 ; GCN:             s_endpgm
-define amdgpu_kernel void @test_call_areg4() #0 {
+define amdgpu_kernel void @test_call_areg4() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
   call void @func_areg_4()
@@ -134,7 +134,7 @@ bb:
 ; GFX908-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
 ; GCN:             s_endpgm
-define amdgpu_kernel void @test_call_areg32() #0 {
+define amdgpu_kernel void @test_call_areg32() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
   call void @func_areg_32()
@@ -155,7 +155,7 @@ bb:
 ; GFX908-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
 ; GCN:             s_endpgm
-define amdgpu_kernel void @test_call_areg64() #0 {
+define amdgpu_kernel void @test_call_areg64() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
   call void @func_areg_64()
@@ -177,7 +177,7 @@ bb:
 ; GFX908-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
 ; GCN:             s_endpgm
-define amdgpu_kernel void @test_call_areg31_63() #0 {
+define amdgpu_kernel void @test_call_areg31_63() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
   call void @func_areg_31_63()
@@ -199,12 +199,10 @@ bb:
 ; GFX908-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
 ; GCN:             s_endpgm
-define amdgpu_kernel void @test_call_unknown() #0 {
+define amdgpu_kernel void @test_call_unknown() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
   call void @func_unknown()
   store volatile <32 x float> %reg, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index f84d476fc12271..8790c94cb6dd28 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -16,7 +16,7 @@
 ; GFX90A: AccumOffset: 12
 ; GCN:    Occupancy: 8
 ; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 2
-define amdgpu_kernel void @kernel_32_agprs() #0 {
+define amdgpu_kernel void @kernel_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v8}" ()
   call void asm sideeffect "", "~{a31}" ()
@@ -35,7 +35,7 @@ bb:
 ; GFX908: Occupancy: 10
 ; GFX90A: Occupancy: 8
 ; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 0
-define amdgpu_kernel void @kernel_0_agprs() #0 {
+define amdgpu_kernel void @kernel_0_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v0}" ()
   ret void
@@ -57,7 +57,7 @@ bb:
 ; GFX908: Occupancy: 6
 ; GFX90A: Occupancy: 8
 ; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 9
-define amdgpu_kernel void @kernel_40_vgprs() #0 {
+define amdgpu_kernel void @kernel_40_vgprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v39}" ()
   call void asm sideeffect "", "~{a15}" ()
@@ -69,7 +69,7 @@ bb:
 ; GCN:    NumAgprs: 32
 ; GFX908: TotalNumVgprs: 32
 ; GFX90A: TotalNumVgprs: 44
-define void @func_32_agprs() #0 {
+define void @func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v8}" ()
   call void asm sideeffect "", "~{a31}" ()
@@ -81,7 +81,7 @@ bb:
 ; GCN:    NumAgprs: 9
 ; GFX908: TotalNumVgprs: 32
 ; GFX90A: TotalNumVgprs: 41
-define void @func_32_vgprs() #0 {
+define void @func_32_vgprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v31}" ()
   call void asm sideeffect "", "~{a8}" ()
@@ -92,7 +92,7 @@ bb:
 ; GCN:    NumVgprs: 1
 ; GCN:    NumAgprs: 0
 ; GCN:    TotalNumVgprs: 1
-define amdgpu_kernel void @func_0_agprs() #0 {
+define amdgpu_kernel void @func_0_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v0}" ()
   ret void
@@ -113,7 +113,7 @@ bb:
 ; GFX90A: AccumOffset: 256
 ; GCN:    Occupancy: 1
 ; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 63
-define amdgpu_kernel void @kernel_max_gprs() #0 {
+define amdgpu_kernel void @kernel_max_gprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v255}" ()
   call void asm sideeffect "", "~{a255}" ()
@@ -134,9 +134,9 @@ bb:
 ; GFX90A: AccumOffset: 12
 ; GCN:    Occupancy: 8
 ; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 2
-define amdgpu_kernel void @kernel_call_func_32_agprs() #0 {
+define amdgpu_kernel void @kernel_call_func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
-  call void @func_32_agprs() #0
+  call void @func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512"
   ret void
 }
 
@@ -145,9 +145,9 @@ bb:
 ; GCN:    NumAgprs: 32
 ; GFX908: TotalNumVgprs: 32
 ; GFX90A: TotalNumVgprs: 44
-define void @func_call_func_32_agprs() #0 {
+define void @func_call_func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
-  call void @func_32_agprs() #0
+  call void @func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512"
   ret void
 }
 
@@ -171,10 +171,8 @@ declare void @undef_func()
 ; GFX908: Occupancy: 8
 ; GFX90A: Occupancy: 8
 ; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: 7
-define amdgpu_kernel void @kernel_call_undef_func() #0 {
+define amdgpu_kernel void @kernel_call_undef_func() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void @undef_func()
   ret void
 }
-
-attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-remat.ll b/llvm/test/CodeGen/AMDGPU/agpr-remat.ll
index 6742ae6c1d584b..c128c4f933b95c 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-remat.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-remat.ll
@@ -3,7 +3,7 @@
 
 ; Make sure there are no v_accvgpr_read_b32 copying back and forth
 ; between AGPR and VGPR.
-define amdgpu_kernel void @remat_constant_voids_spill(ptr addrspace(1) %p) #1 {
+define amdgpu_kernel void @remat_constant_voids_spill(ptr addrspace(1) %p) nounwind "amdgpu-num-vgpr"="8" {
 ; GFX908-LABEL: remat_constant_voids_spill:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    v_accvgpr_write_b32 a1, 1
@@ -25,7 +25,7 @@ define amdgpu_kernel void @remat_constant_voids_spill(ptr addrspace(1) %p) #1 {
   ret void
 }
 
-define void @remat_regcopy_avoids_spill(i32 %v0, i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, i32 %v9, i32 %v10) #1 {
+define void @remat_regcopy_avoids_spill(i32 %v0, i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, i32 %v9, i32 %v10) nounwind "amdgpu-num-vgpr"="8" {
 ; GFX908-LABEL: remat_regcopy_avoids_spill:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -47,5 +47,3 @@ define void @remat_regcopy_avoids_spill(i32 %v0, i32 %v1, i32 %v2, i32 %v3, i32
   call void asm sideeffect "", "a,a,a,a,a"(i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8)
   ret void
 }
-
-attributes #1 = { nounwind "amdgpu-num-vgpr"="8" }
diff --git a/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll b/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
index fc1326268fd1e7..0ec81458e96830 100644
--- a/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
+++ b/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
@@ -95,6 +95,4 @@ bb:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
index 51b273b909f6d0..140a21bd4b9293 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
@@ -7,7 +7,7 @@
 ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
-define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
+define amdgpu_kernel void @kernel_ieee_mode_default() nounwind {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -22,7 +22,7 @@ define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
 ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
-define amdgpu_kernel void @kernel_ieee_mode_on() #1 {
+define amdgpu_kernel void @kernel_ieee_mode_on() nounwind "amdgpu-ieee"="true" {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -37,7 +37,7 @@ define amdgpu_kernel void @kernel_ieee_mode_on() #1 {
 ; GCN-NOT: [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
 ; GCN-NOT: v_mul_f32
-define amdgpu_kernel void @kernel_ieee_mode_off() #2 {
+define amdgpu_kernel void @kernel_ieee_mode_off() nounwind "amdgpu-ieee"="false" {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -52,7 +52,7 @@ define amdgpu_kernel void @kernel_ieee_mode_off() #2 {
 ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
-define void @func_ieee_mode_default() #0 {
+define void @func_ieee_mode_default() nounwind {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -67,7 +67,7 @@ define void @func_ieee_mode_default() #0 {
 ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
-define void @func_ieee_mode_on() #1 {
+define void @func_ieee_mode_on() nounwind "amdgpu-ieee"="true" {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -82,7 +82,7 @@ define void @func_ieee_mode_on() #1 {
 ; GCN-NOT: [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
 ; GCN-NOT: v_mul_f32
-define void @func_ieee_mode_off() #2 {
+define void @func_ieee_mode_off() nounwind "amdgpu-ieee"="false" {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -97,7 +97,7 @@ define void @func_ieee_mode_off() #2 {
 ; GCN-NOT: [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
-define amdgpu_cs void @cs_ieee_mode_default() #0 {
+define amdgpu_cs void @cs_ieee_mode_default() nounwind {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -112,7 +112,7 @@ define amdgpu_cs void @cs_ieee_mode_default() #0 {
 ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
-define amdgpu_cs void @cs_ieee_mode_on() #1 {
+define amdgpu_cs void @cs_ieee_mode_on() nounwind "amdgpu-ieee"="true" {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -127,7 +127,7 @@ define amdgpu_cs void @cs_ieee_mode_on() #1 {
 ; GCN-NOT: [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
 ; GCN-NOT: v_mul_f32
-define amdgpu_cs void @cs_ieee_mode_off() #2 {
+define amdgpu_cs void @cs_ieee_mode_off() nounwind "amdgpu-ieee"="false" {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -142,7 +142,7 @@ define amdgpu_cs void @cs_ieee_mode_off() #2 {
 ; GCN-NOT: [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
 ; GCN-NOT: v_mul_f32
-define amdgpu_ps void @ps_ieee_mode_default() #0 {
+define amdgpu_ps void @ps_ieee_mode_default() nounwind {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -157,7 +157,7 @@ define amdgpu_ps void @ps_ieee_mode_default() #0 {
 ; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
 ; GCN-NOT: v_mul_f32
-define amdgpu_ps void @ps_ieee_mode_on() #1 {
+define amdgpu_ps void @ps_ieee_mode_on() nounwind "amdgpu-ieee"="true" {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -172,7 +172,7 @@ define amdgpu_ps void @ps_ieee_mode_on() #1 {
 ; GCN-NOT: [[VAL1]]
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
 ; GCN-NOT: v_mul_f32
-define amdgpu_ps void @ps_ieee_mode_off() #2 {
+define amdgpu_ps void @ps_ieee_mode_off() nounwind "amdgpu-ieee"="false" {
   %val0 = load volatile float, ptr addrspace(1) undef
   %val1 = load volatile float, ptr addrspace(1) undef
   %min = call float @llvm.minnum.f32(float %val0, float %val1)
@@ -180,9 +180,4 @@ define amdgpu_ps void @ps_ieee_mode_off() #2 {
   ret void
 }
 
-declare float @llvm.minnum.f32(float, float) #3
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-ieee"="true" }
-attributes #2 = { nounwind "amdgpu-ieee"="false" }
-attributes #3 = { nounwind readnone speculatable }
+declare float @llvm.minnum.f32(float, float) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
index 77976e470fc789..63804e71f5a7d0 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
@@ -10,9 +10,9 @@
 ; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
 ; SDAG: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR_IMM killed %{{[0-9]+}}, killed %[[OFFSET]], 0, 0 :: (invariant load (s128) from %ir.12, addrspace 4)
 ; GISEL: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR_IMM %{{[0-9]+}}, %[[OFFSET]], 0, 0 :: (invariant load (<4 x s32>) from {{.*}}, addrspace 4)
-define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
+define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr nounwind "amdgpu-unroll-threshold"="700" {
 .entry:
-  %5 = call i64 @llvm.amdgcn.s.getpc() #3
+  %5 = call i64 @llvm.amdgcn.s.getpc() nounwind readnone speculatable
   %6 = bitcast i64 %5 to <2 x i32>
   %7 = insertelement <2 x i32> %6, i32 %resNode0, i32 0
   %8 = bitcast <2 x i32> %7 to i64
@@ -125,7 +125,7 @@ define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset(<4 x i32> inreg %ba
 ; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr4
 ; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3
 ; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 77,
-define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset_nuw(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) #0 {
+define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset_nuw(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) argmemonly nounwind willreturn {
   %off = add nuw i32 %i, 77
   %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0)
   store i32 %v, ptr addrspace(1) %out, align 4
@@ -149,7 +149,7 @@ define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset_nuw(<4 x i32> inreg
 ; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3
 ; GISEL-DAG: %[[ADD:.*]]:sreg_32 = nsw S_ADD_I32 %1, %10, implicit-def dead $scc
 ; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[ADD]], 0,
-define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset_nsw(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) #0 {
+define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset_nsw(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) argmemonly nounwind willreturn {
     %off = add nsw i32 %i, 77
     %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0)
     store i32 %v, ptr addrspace(1) %out, align 4
@@ -173,7 +173,7 @@ define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset_nsw(<4 x i32> inreg
 ; GISEL-DAG: %[[BASE:.*]]:sgpr_128 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1, %[[BASE2]], %subreg.sub2, %[[BASE3]], %subreg.sub3
 ; GISEL-DAG: %[[ADD:.*]]:sreg_32 = S_ADD_I32 %1, %10, implicit-def dead $scc
 ; GISEL: S_BUFFER_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[ADD]], 0,
-define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset_noflags(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) #0 {
+define amdgpu_cs void @test_buffer_load_sgpr_plus_imm_offset_noflags(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) argmemonly nounwind willreturn {
     %off = add i32 %i, 77
     %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0)
     store i32 %v, ptr addrspace(1) %out, align 4
@@ -205,24 +205,18 @@ define amdgpu_cs void @test_buffer_load_sgpr_or_imm_offset(<4 x i32> inreg %base
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8) nocapture, i32, i32, i32 immarg) #1
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8) nocapture, i32, i32, i32 immarg) nounwind memory(argmem: write)
 
 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) nounwind readnone willreturn
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.reloc.constant(metadata) #3
+declare i32 @llvm.amdgcn.reloc.constant(metadata) nounwind readnone speculatable
 
 ; Function Attrs: nounwind readnone speculatable
-declare i64 @llvm.amdgcn.s.getpc() #3
+declare i64 @llvm.amdgcn.s.getpc() nounwind readnone speculatable
 
 ; Function Attrs: nounwind readnone
-declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) #1
-
-attributes #0 = { argmemonly nounwind willreturn }
-attributes #1 = { nounwind memory(argmem: write) }
-attributes #2 = { nounwind "amdgpu-unroll-threshold"="700" }
-attributes #3 = { nounwind readnone speculatable }
-attributes #4 = { nounwind writeonly }
+declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) nounwind memory(argmem: write)
 
 !llpc.compute.mode = !{!0}
 !llpc.options = !{!1}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 33b1cc65dc5699..374992a7d7a288 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -119,7 +119,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 ; CHECK-NEXT:    call void @unknown() #[[ATTR9:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
-  call void @unknown() #0
+  call void @unknown() "amdgpu-no-agpr"
   ret void
 }
 
@@ -139,7 +139,7 @@ define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect)
 ; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR9]]
 ; CHECK-NEXT:    ret void
 ;
-  call void %indirect() #0
+  call void %indirect() "amdgpu-no-agpr"
   ret void
 }
 
@@ -238,9 +238,6 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
   call void %fptr()
   ret void
 }
-
-
-attributes #0 = { "amdgpu-no-agpr" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
index d9e04649421820..d5f54e29618bd2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -9,7 +9,7 @@
 ; RUN: opt -amdgpu-codegenprepare -disable-output %s
 
 
-define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) optnone noinline {
 ; CHECK-LABEL: define amdgpu_kernel void @noop_fdiv_fpmath(
 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[MD_25ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !0
@@ -4481,8 +4481,6 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
 declare void @llvm.assume(i1 noundef)
 
-attributes #0 = { optnone noinline }
-
 !0 = !{float 2.500000e+00}
 !1 = !{float 5.000000e-01}
 !2 = !{float 1.000000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index 5c40a4ce13e31a..24ea31c3d2c27b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -508,6 +508,4 @@ define <2 x half> @multi_use_cast_regression(i1 %cond) {
   ret <2 x half> %call
 }
 
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll
index 03c84d11936092..411621e163d66f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll
@@ -3,7 +3,7 @@
 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE %s
 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=CHECK,DAZ %s
 
-define amdgpu_kernel void @noop_sqrt_fpmath(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @noop_sqrt_fpmath(ptr addrspace(1) %out, float %x) optnone noinline {
 ; CHECK-LABEL: define amdgpu_kernel void @noop_sqrt_fpmath
 ; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0
@@ -607,8 +607,6 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
 declare float @llvm.fabs.f32(float)
 declare void @llvm.assume(i1 noundef)
 
-attributes #0 = { optnone noinline }
-
 !0 = !{float 2.500000e+00}
 !1 = !{float 5.000000e-01}
 !2 = !{float 1.000000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
index b34df3ffca2642..f5d5f62848f09c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
@@ -51,7 +51,7 @@ bb:
   ret float %call
 }
 
-define void @foo_noinline(ptr addrspace(5) nocapture %p) #0 {
+define void @foo_noinline(ptr addrspace(5) nocapture %p) noinline {
 entry:
   %tmp1 = load float, ptr addrspace(5) %p, align 4
   %mul = fmul float %tmp1, 2.000000e+00
@@ -171,9 +171,6 @@ bb.2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @_Z3sinf(float) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @_Z3sinf(float) nounwind readnone
 declare void @forbid_sroa(ptr addrspace(5) nocapture %p)
-
-attributes #0 = { noinline }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-nsa-threshold.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-nsa-threshold.ll
index b8681a0a2ac654..55a984b63b955b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-nsa-threshold.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-nsa-threshold.ll
@@ -6,7 +6,7 @@
 
 ; Note: command line argument should override function attribute.
 
-define amdgpu_ps <4 x float> @sample_2d_nsa2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) #2 {
+define amdgpu_ps <4 x float> @sample_2d_nsa2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) nounwind readonly "amdgpu-nsa-threshold"="2" {
 ; ATTRIB-LABEL: sample_2d_nsa2:
 ; ATTRIB:       ; %bb.0: ; %main_body
 ; ATTRIB-NEXT:    s_mov_b32 s12, exec_lo
@@ -51,7 +51,7 @@ main_body:
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_3d_nsa2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) #2 {
+define amdgpu_ps <4 x float> @sample_3d_nsa2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) nounwind readonly "amdgpu-nsa-threshold"="2" {
 ; ATTRIB-LABEL: sample_3d_nsa2:
 ; ATTRIB:       ; %bb.0: ; %main_body
 ; ATTRIB-NEXT:    s_mov_b32 s12, exec_lo
@@ -96,7 +96,7 @@ main_body:
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_2d_nsa3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) #3 {
+define amdgpu_ps <4 x float> @sample_2d_nsa3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) nounwind readonly "amdgpu-nsa-threshold"="3" {
 ; ATTRIB-LABEL: sample_2d_nsa3:
 ; ATTRIB:       ; %bb.0: ; %main_body
 ; ATTRIB-NEXT:    s_mov_b32 s12, exec_lo
@@ -141,7 +141,7 @@ main_body:
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_3d_nsa3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) #3 {
+define amdgpu_ps <4 x float> @sample_3d_nsa3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) nounwind readonly "amdgpu-nsa-threshold"="3" {
 ; ATTRIB-LABEL: sample_3d_nsa3:
 ; ATTRIB:       ; %bb.0: ; %main_body
 ; ATTRIB-NEXT:    s_mov_b32 s12, exec_lo
@@ -186,7 +186,7 @@ main_body:
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_2d_nsa4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) #4 {
+define amdgpu_ps <4 x float> @sample_2d_nsa4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) nounwind readonly "amdgpu-nsa-threshold"="4" {
 ; ATTRIB-LABEL: sample_2d_nsa4:
 ; ATTRIB:       ; %bb.0: ; %main_body
 ; ATTRIB-NEXT:    s_mov_b32 s12, exec_lo
@@ -231,7 +231,7 @@ main_body:
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @sample_3d_nsa4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) #4 {
+define amdgpu_ps <4 x float> @sample_3d_nsa4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) nounwind readonly "amdgpu-nsa-threshold"="4" {
 ; ATTRIB-LABEL: sample_3d_nsa4:
 ; ATTRIB:       ; %bb.0: ; %main_body
 ; ATTRIB-NEXT:    s_mov_b32 s12, exec_lo
@@ -276,10 +276,5 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readonly "amdgpu-nsa-threshold"="2" }
-attributes #3 = { nounwind readonly "amdgpu-nsa-threshold"="3" }
-attributes #4 = { nounwind readonly "amdgpu-nsa-threshold"="4" }
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
index c5dbfb0f219bd9..f36d7b64509802 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
@@ -14,21 +14,17 @@
 ; ELF-NEXT: Section (3) .rel.text {
 ; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32_LO doff_0_0_b{{$}}
 
-define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
+define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr nounwind {
   %rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)
   %rcf = bitcast i32 %rc to float
-  call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %rcf, float undef, float undef, float undef, i1 immarg false, i1 immarg false) #0
+  call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %rcf, float undef, float undef, float undef, i1 immarg false, i1 immarg false) nounwind
   ret void
 }
 
 ; Function Attrs: inaccessiblememonly nounwind
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) inaccessiblememonly nounwind
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.reloc.constant(metadata) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { inaccessiblememonly nounwind }
-attributes #2 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.reloc.constant(metadata) nounwind readnone speculatable
 
 !1 = !{!"doff_0_0_b"}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll
index ad543c0d2338ab..5be89d94f896e6 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll
@@ -210,7 +210,7 @@ define float @test_ceil_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[CEIL:%.*]] = tail call float @_Z4ceilf(float [[ARG]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[CEIL]]
 ;
-  %ceil = tail call float @_Z4ceilf(float %arg) #0
+  %ceil = tail call float @_Z4ceilf(float %arg) nobuiltin
   ret float %ceil
 }
 
@@ -220,28 +220,28 @@ define <2 x float> @test_ceil_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[CEIL:%.*]] = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[CEIL]]
 ;
-  %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) #0
+  %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %ceil
 }
 
 ; "no-builtins" should be ignored
-define float @test_ceil_f32_nobuiltins(float %arg) #1 {
+define float @test_ceil_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_ceil_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[CEIL:%.*]] = tail call float @_Z4ceilf(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[CEIL]]
 ;
-  %ceil = tail call float @_Z4ceilf(float %arg) #0
+  %ceil = tail call float @_Z4ceilf(float %arg) nobuiltin
   ret float %ceil
 }
 
-define <2 x float> @test_ceil_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_ceil_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_ceil_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[CEIL:%.*]] = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[CEIL]]
 ;
-  %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) #0
+  %ceil = tail call <2 x float> @_Z4ceilDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %ceil
 }
 
@@ -286,8 +286,8 @@ define <2 x float> @test_ceil_v2f32_preserve_flags_md(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @ceilf(float) #2
-declare double @ceil(double) #2
+declare float @ceilf(float) nounwind memory(none)
+declare double @ceil(double) nounwind memory(none)
 
 define float @test_libm_ceil_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_ceil_f32
@@ -309,19 +309,14 @@ define double @test_libm_ceil_f64(double %arg) {
   ret double %ceil
 }
 
-define float @test_ceil_f32_strictfp(float %arg) #3 {
+define float @test_ceil_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_ceil_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[CEIL:%.*]] = tail call nnan float @_Z4ceilf(float [[ARG]]) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[CEIL]]
 ;
-  %ceil = tail call nnan float @_Z4ceilf(float %arg) #3
+  %ceil = tail call nnan float @_Z4ceilf(float %arg) strictfp
   ret float %ceil
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { strictfp }
-
 !0 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll
index 5ab12f53a3b5c3..b8fde228ed6a16 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-copysign.ll
@@ -212,7 +212,7 @@ define <16 x half> @test_copysign_v16f16(<16 x half> %x, <16 x half> %y) {
   ret <16 x half> %copysign
 }
 
-define float @test_copysign_f32_minsize(float %x, float %y) #0 {
+define float @test_copysign_f32_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_copysign_f32_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @llvm.copysign.f32(float [[X]], float [[Y]])
@@ -222,7 +222,7 @@ define float @test_copysign_f32_minsize(float %x, float %y) #0 {
   ret float %copysign
 }
 
-define float @test_copysign_f32_nnan_minsize(float %x, float %y) #0 {
+define float @test_copysign_f32_nnan_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_copysign_f32_nnan_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @llvm.copysign.f32(float [[X]], float [[Y]])
@@ -238,7 +238,7 @@ define float @test_copysign_f32_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
-  %copysign = tail call float @_Z8copysignff(float %x, float %y) #1
+  %copysign = tail call float @_Z8copysignff(float %x, float %y) noinline
   ret float %copysign
 }
 
@@ -248,17 +248,17 @@ define float @test_copysign_f32_nnan_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
-  %copysign = tail call nnan float @_Z8copysignff(float %x, float %y) #1
+  %copysign = tail call nnan float @_Z8copysignff(float %x, float %y) noinline
   ret float %copysign
 }
 
-define float @test_copysign_f32_strictfp(float %x, float %y) #2 {
+define float @test_copysign_f32_strictfp(float %x, float %y) strictfp {
 ; CHECK-LABEL: define float @test_copysign_f32_strictfp
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call nnan nsz float @llvm.copysign.f32(float [[X]], float [[Y]]) #[[ATTR1]]
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
-  %copysign = tail call nsz nnan float @_Z8copysignff(float %x, float %y) #2
+  %copysign = tail call nsz nnan float @_Z8copysignff(float %x, float %y) strictfp
   ret float %copysign
 }
 
@@ -268,11 +268,6 @@ define float @test_copysign_f32_fast_nobuiltin(float %x, float %y) {
 ; CHECK-NEXT:    [[COPYSIGN:%.*]] = tail call fast float @_Z8copysignff(float [[X]], float [[Y]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
-  %copysign = tail call fast float @_Z8copysignff(float %x, float %y) #3
+  %copysign = tail call fast float @_Z8copysignff(float %x, float %y) nobuiltin
   ret float %copysign
 }
-
-attributes #0 = { minsize }
-attributes #1 = { noinline }
-attributes #2 = { strictfp }
-attributes #3 = { nobuiltin }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll
index 0da0acc2030a11..886c6b5bc27f50 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp.ll
@@ -280,7 +280,7 @@ define float @test_exp_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[EXP]]
 ;
-  %exp = tail call float @_Z3expf(float %arg) #0, !fpmath !0
+  %exp = tail call float @_Z3expf(float %arg) nobuiltin, !fpmath !0
   ret float %exp
 }
 
@@ -290,7 +290,7 @@ define <2 x float> @test_exp_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[EXP]]
 ;
-  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0, !fpmath !0
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %exp
 }
 
@@ -300,7 +300,7 @@ define float @test_exp_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[EXP]]
 ;
-  %exp = tail call float @_Z3expf(float %arg) #0
+  %exp = tail call float @_Z3expf(float %arg) nobuiltin
   ret float %exp
 }
 
@@ -310,48 +310,48 @@ define <2 x float> @test_exp_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[EXP]]
 ;
-  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %exp
 }
 
 ; "no-builtins" should be ignored
-define float @test_exp_f32_nobuiltins(float %arg) #1 {
+define float @test_exp_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_exp_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret float [[EXP]]
 ;
-  %exp = tail call float @_Z3expf(float %arg) #0, !fpmath !0
+  %exp = tail call float @_Z3expf(float %arg) nobuiltin, !fpmath !0
   ret float %exp
 }
 
-define <2 x float> @test_exp_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_exp_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_exp_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[EXP]]
 ;
-  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0, !fpmath !0
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %exp
 }
 
-define float @test_exp_cr_f32_nobuiltins(float %arg) #1 {
+define float @test_exp_cr_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_exp_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call float @_Z3expf(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[EXP]]
 ;
-  %exp = tail call float @_Z3expf(float %arg) #0
+  %exp = tail call float @_Z3expf(float %arg) nobuiltin
   ret float %exp
 }
 
-define <2 x float> @test_exp_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_exp_cr_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_exp_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call <2 x float> @_Z3expDv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[EXP]]
 ;
-  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) #0
+  %exp = tail call <2 x float> @_Z3expDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %exp
 }
 
@@ -416,8 +416,8 @@ define <2 x float> @test_exp_cr_v2f32_preserve_flags(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @expf(float) #2
-declare double @exp(double) #2
+declare float @expf(float) nounwind memory(none)
+declare double @exp(double) nounwind memory(none)
 
 define float @test_libm_exp_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_exp_f32
@@ -485,11 +485,11 @@ define float @test_exp_f32_fast_noinline(float %arg) {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call fast float @_Z3expf(float [[ARG]]) #[[ATTR7:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[EXP]]
 ;
-  %exp = tail call fast float @_Z3expf(float %arg) #3, !fpmath !0
+  %exp = tail call fast float @_Z3expf(float %arg) noinline, !fpmath !0
   ret float %exp
 }
 
-define float @test_exp_f32_fast_optsize(float %arg) #4 {
+define float @test_exp_f32_fast_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_exp_f32_fast_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call fast float @llvm.exp.f32(float [[ARG]]), !fpmath !0
@@ -499,7 +499,7 @@ define float @test_exp_f32_fast_optsize(float %arg) #4 {
   ret float %exp
 }
 
-define float @test_exp_f32_fast_minsize(float %arg) #5 {
+define float @test_exp_f32_fast_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_exp_f32_fast_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call fast float @llvm.exp.f32(float [[ARG]]), !fpmath !0
@@ -509,7 +509,7 @@ define float @test_exp_f32_fast_minsize(float %arg) #5 {
   ret float %exp
 }
 
-define float @test_exp_f32_nsz_contract_optsize(float %arg) #4 {
+define float @test_exp_f32_nsz_contract_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_exp_f32_nsz_contract_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call nsz contract float @llvm.exp.f32(float [[ARG]]), !fpmath !0
@@ -519,7 +519,7 @@ define float @test_exp_f32_nsz_contract_optsize(float %arg) #4 {
   ret float %exp
 }
 
-define float @test_exp_f32_nsz_contract_minsize(float %arg) #5 {
+define float @test_exp_f32_nsz_contract_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_exp_f32_nsz_contract_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call nsz contract float @_Z3expf(float [[ARG]]), !fpmath !0
@@ -529,7 +529,7 @@ define float @test_exp_f32_nsz_contract_minsize(float %arg) #5 {
   ret float %exp
 }
 
-define half @test_exp_f16_fast_minsize(half %arg) #5 {
+define half @test_exp_f16_fast_minsize(half %arg) minsize {
 ; CHECK-LABEL: define half @test_exp_f16_fast_minsize
 ; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call half @_Z3expDh(half [[ARG]])
@@ -539,23 +539,15 @@ define half @test_exp_f16_fast_minsize(half %arg) #5 {
   ret half %exp
 }
 
-define float @test_exp_f32_strictfp(float %arg) #6 {
+define float @test_exp_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_exp_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call nsz float @_Z3expf(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[EXP]]
 ;
-  %exp = tail call nsz float @_Z3expf(float %arg) #6
+  %exp = tail call nsz float @_Z3expf(float %arg) strictfp
   ret float %exp
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { noinline }
-attributes #4 = { optsize }
-attributes #5 = { minsize }
-attributes #6 = { strictfp }
-
 !0 = !{float 3.000000e+00}
 !1 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll
index 96db9c65959d8f..85146af3f8b634 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-exp2.ll
@@ -280,7 +280,7 @@ define float @test_exp2_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[EXP2]]
 ;
-  %exp2 = tail call float @_Z4exp2f(float %arg) #0, !fpmath !0
+  %exp2 = tail call float @_Z4exp2f(float %arg) nobuiltin, !fpmath !0
   ret float %exp2
 }
 
@@ -290,7 +290,7 @@ define <2 x float> @test_exp2_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
-  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %exp2
 }
 
@@ -300,7 +300,7 @@ define float @test_exp2_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[EXP2]]
 ;
-  %exp2 = tail call float @_Z4exp2f(float %arg) #0
+  %exp2 = tail call float @_Z4exp2f(float %arg) nobuiltin
   ret float %exp2
 }
 
@@ -310,48 +310,48 @@ define <2 x float> @test_exp2_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
-  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %exp2
 }
 
 ; "no-builtins" should be ignored
-define float @test_exp2_f32_nobuiltins(float %arg) #1 {
+define float @test_exp2_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_exp2_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret float [[EXP2]]
 ;
-  %exp2 = tail call float @_Z4exp2f(float %arg) #0, !fpmath !0
+  %exp2 = tail call float @_Z4exp2f(float %arg) nobuiltin, !fpmath !0
   ret float %exp2
 }
 
-define <2 x float> @test_exp2_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_exp2_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_exp2_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
-  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %exp2
 }
 
-define float @test_exp2_cr_f32_nobuiltins(float %arg) #1 {
+define float @test_exp2_cr_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_exp2_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call float @_Z4exp2f(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[EXP2]]
 ;
-  %exp2 = tail call float @_Z4exp2f(float %arg) #0
+  %exp2 = tail call float @_Z4exp2f(float %arg) nobuiltin
   ret float %exp2
 }
 
-define <2 x float> @test_exp2_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_exp2_cr_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_exp2_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
-  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) #0
+  %exp2 = tail call <2 x float> @_Z4exp2Dv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %exp2
 }
 
@@ -416,8 +416,8 @@ define <2 x float> @test_exp2_cr_v2f32_preserve_flags(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @exp2f(float) #2
-declare double @exp2(double) #2
+declare float @exp2f(float) nounwind memory(none)
+declare double @exp2(double) nounwind memory(none)
 
 define float @test_libm_exp2_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_exp2_f32
@@ -485,11 +485,11 @@ define float @test_exp2_f32_fast_noinline(float %arg) {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @_Z4exp2f(float [[ARG]]) #[[ATTR7:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[EXP2]]
 ;
-  %exp2 = tail call fast float @_Z4exp2f(float %arg) #3, !fpmath !0
+  %exp2 = tail call fast float @_Z4exp2f(float %arg) noinline, !fpmath !0
   ret float %exp2
 }
 
-define float @test_exp2_f32_fast_optsize(float %arg) #4 {
+define float @test_exp2_f32_fast_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_exp2_f32_fast_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[ARG]]), !fpmath !0
@@ -499,7 +499,7 @@ define float @test_exp2_f32_fast_optsize(float %arg) #4 {
   ret float %exp2
 }
 
-define float @test_exp2_f32_fast_minsize(float %arg) #5 {
+define float @test_exp2_f32_fast_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_exp2_f32_fast_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast float @llvm.exp2.f32(float [[ARG]]), !fpmath !0
@@ -509,7 +509,7 @@ define float @test_exp2_f32_fast_minsize(float %arg) #5 {
   ret float %exp2
 }
 
-define float @test_exp2_f32_nsz_contract_optsize(float %arg) #4 {
+define float @test_exp2_f32_nsz_contract_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_exp2_f32_nsz_contract_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call nsz contract float @llvm.exp2.f32(float [[ARG]]), !fpmath !0
@@ -519,7 +519,7 @@ define float @test_exp2_f32_nsz_contract_optsize(float %arg) #4 {
   ret float %exp2
 }
 
-define float @test_exp2_f32_nsz_contract_minsize(float %arg) #5 {
+define float @test_exp2_f32_nsz_contract_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_exp2_f32_nsz_contract_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call nsz contract float @_Z4exp2f(float [[ARG]]), !fpmath !0
@@ -529,7 +529,7 @@ define float @test_exp2_f32_nsz_contract_minsize(float %arg) #5 {
   ret float %exp2
 }
 
-define half @test_exp2_f16_fast_minsize(half %arg) #5 {
+define half @test_exp2_f16_fast_minsize(half %arg) minsize {
 ; CHECK-LABEL: define half @test_exp2_f16_fast_minsize
 ; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[EXP2:%.*]] = tail call fast half @llvm.exp2.f16(half [[ARG]])
@@ -539,23 +539,15 @@ define half @test_exp2_f16_fast_minsize(half %arg) #5 {
   ret half %exp2
 }
 
-define float @test_exp2_f32_strictfp(float %arg) #6 {
+define float @test_exp2_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_exp2_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    [[EXP:%.*]] = tail call nsz float @_Z4exp2f(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[EXP]]
 ;
-  %exp = tail call nsz float @_Z4exp2f(float %arg) #6
+  %exp = tail call nsz float @_Z4exp2f(float %arg) strictfp
   ret float %exp
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { noinline }
-attributes #4 = { optsize }
-attributes #5 = { minsize }
-attributes #6 = { strictfp }
-
 !0 = !{float 3.000000e+00}
 !1 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll
index 74b867e93ca18a..367bfdcb36af42 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fabs.ll
@@ -210,7 +210,7 @@ define float @test_fabs_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[FABS]]
 ;
-  %fabs = tail call float @_Z4fabsf(float %arg) #0
+  %fabs = tail call float @_Z4fabsf(float %arg) nobuiltin
   ret float %fabs
 }
 
@@ -220,28 +220,28 @@ define <2 x float> @test_fabs_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
-  %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) #0
+  %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %fabs
 }
 
 ; "no-builtins" should be ignored
-define float @test_fabs_f32_nobuiltins(float %arg) #1 {
+define float @test_fabs_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_fabs_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[FABS:%.*]] = tail call float @_Z4fabsf(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[FABS]]
 ;
-  %fabs = tail call float @_Z4fabsf(float %arg) #0
+  %fabs = tail call float @_Z4fabsf(float %arg) nobuiltin
   ret float %fabs
 }
 
-define <2 x float> @test_fabs_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_fabs_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_fabs_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[FABS:%.*]] = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
-  %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) #0
+  %fabs = tail call <2 x float> @_Z4fabsDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %fabs
 }
 
@@ -286,8 +286,8 @@ define <2 x float> @test_fabs_v2f32_preserve_flags_md(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @fabsf(float) #2
-declare double @fabs(double) #2
+declare float @fabsf(float) nounwind memory(none)
+declare double @fabs(double) nounwind memory(none)
 
 define float @test_libm_fabs_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_fabs_f32
@@ -309,19 +309,14 @@ define double @test_libm_fabs_f64(double %arg) {
   ret double %fabs
 }
 
-define float @test_fabs_f32_strictfp(float %arg) #3 {
+define float @test_fabs_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_fabs_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[FABS:%.*]] = tail call nnan float @llvm.fabs.f32(float [[ARG]]) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[FABS]]
 ;
-  %fabs = tail call nnan float @_Z4fabsf(float %arg) #3
+  %fabs = tail call nnan float @_Z4fabsf(float %arg) strictfp
   ret float %fabs
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { strictfp }
-
 !0 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll
index 6b3b4cc95d149c..59dfe53baa64a7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-floor.ll
@@ -210,7 +210,7 @@ define float @test_rint_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5floorf(float [[ARG]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call float @_Z5floorf(float %arg) #0
+  %rint = tail call float @_Z5floorf(float %arg) nobuiltin
   ret float %rint
 }
 
@@ -220,28 +220,28 @@ define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[RINT]]
 ;
-  %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg) #0
+  %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %rint
 }
 
 ; "no-builtins" should be ignored
-define float @test_rint_f32_nobuiltins(float %arg) #1 {
+define float @test_rint_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_rint_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5floorf(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call float @_Z5floorf(float %arg) #0
+  %rint = tail call float @_Z5floorf(float %arg) nobuiltin
   ret float %rint
 }
 
-define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5floorDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[RINT]]
 ;
-  %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg) #0
+  %rint = tail call <2 x float> @_Z5floorDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %rint
 }
 
@@ -286,8 +286,8 @@ define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @rintf(float) #2
-declare double @rint(double) #2
+declare float @rintf(float) nounwind memory(none)
+declare double @rint(double) nounwind memory(none)
 
 define float @test_libm_rint_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_rint_f32
@@ -309,19 +309,14 @@ define double @test_libm_rint_f64(double %arg) {
   ret double %rint
 }
 
-define float @test_rint_f32_strictfp(float %arg) #3 {
+define float @test_rint_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_rint_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan float @_Z5floorf(float [[ARG]]) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call nnan float @_Z5floorf(float %arg) #3
+  %rint = tail call nnan float @_Z5floorf(float %arg) strictfp
   ret float %rint
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { strictfp }
-
 !0 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll
index 93c223f342a1d1..6d4d54a356946b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fma.ll
@@ -218,11 +218,11 @@ define float @test_fma_f32_noinline(float %x, float %y, float %z) {
 ; CHECK-NEXT:    [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
-  %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) #1
+  %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) noinline
   ret float %fma
 }
 
-define float @test_fma_f32_fast_minsize(float %x, float %y, float %z) #0 {
+define float @test_fma_f32_fast_minsize(float %x, float %y, float %z) minsize {
 ; CHECK-LABEL: define float @test_fma_f32_fast_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[FMA:%.*]] = tail call fast float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z]])
@@ -232,13 +232,13 @@ define float @test_fma_f32_fast_minsize(float %x, float %y, float %z) #0 {
   ret float %fma
 }
 
-define float @test_fma_f32_fast_strictfp(float %x, float %y, float %z) #2 {
+define float @test_fma_f32_fast_strictfp(float %x, float %y, float %z) strictfp {
 ; CHECK-LABEL: define float @test_fma_f32_fast_strictfp
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[FMA:%.*]] = tail call nnan nsz float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR1]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
-  %fma = tail call nsz nnan float @_Z3fmafff(float %x, float %y, float %z) #2
+  %fma = tail call nsz nnan float @_Z3fmafff(float %x, float %y, float %z) strictfp
   ret float %fma
 }
 
@@ -248,11 +248,6 @@ define float @test_fma_f32_fast_nobuiltin(float %x, float %y, float %z) {
 ; CHECK-NEXT:    [[FMA:%.*]] = tail call fast float @_Z3fmafff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[FMA]]
 ;
-  %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) #3
+  %fma = tail call fast float @_Z3fmafff(float %x, float %y, float %z) nobuiltin
   ret float %fma
 }
-
-attributes #0 = { minsize }
-attributes #1 = { noinline }
-attributes #2 = { strictfp }
-attributes #3 = { nobuiltin }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll
index a6ff5c9984ea84..a485a77e9084e3 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax.ll
@@ -212,7 +212,7 @@ define <16 x half> @test_fmax_v16f16(<16 x half> %x, <16 x half> %y) {
   ret <16 x half> %fmax
 }
 
-define float @test_fmax_f32_minsize(float %x, float %y) #0 {
+define float @test_fmax_f32_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_fmax_f32_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[FMAX:%.*]] = tail call float @llvm.maxnum.f32(float [[X]], float [[Y]])
@@ -222,7 +222,7 @@ define float @test_fmax_f32_minsize(float %x, float %y) #0 {
   ret float %fmax
 }
 
-define float @test_fmax_f32_nnan_minsize(float %x, float %y) #0 {
+define float @test_fmax_f32_nnan_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_fmax_f32_nnan_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[FMAX:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[X]], float [[Y]])
@@ -238,7 +238,7 @@ define float @test_fmax_f32_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[FMAX:%.*]] = tail call float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    ret float [[FMAX]]
 ;
-  %fmax = tail call float @_Z4fmaxff(float %x, float %y) #1
+  %fmax = tail call float @_Z4fmaxff(float %x, float %y) noinline
   ret float %fmax
 }
 
@@ -248,17 +248,17 @@ define float @test_fmax_f32_nnan_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[FMAX:%.*]] = tail call nnan float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[FMAX]]
 ;
-  %fmax = tail call nnan float @_Z4fmaxff(float %x, float %y) #1
+  %fmax = tail call nnan float @_Z4fmaxff(float %x, float %y) noinline
   ret float %fmax
 }
 
-define float @test_fmax_f32_strictfp(float %x, float %y) #2 {
+define float @test_fmax_f32_strictfp(float %x, float %y) strictfp {
 ; CHECK-LABEL: define float @test_fmax_f32_strictfp
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[FMAX:%.*]] = tail call nnan nsz float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR1]]
 ; CHECK-NEXT:    ret float [[FMAX]]
 ;
-  %fmax = tail call nsz nnan float @_Z4fmaxff(float %x, float %y) #2
+  %fmax = tail call nsz nnan float @_Z4fmaxff(float %x, float %y) strictfp
   ret float %fmax
 }
 
@@ -268,11 +268,6 @@ define float @test_fmax_f32_fast_nobuiltin(float %x, float %y) {
 ; CHECK-NEXT:    [[FMAX:%.*]] = tail call fast float @_Z4fmaxff(float [[X]], float [[Y]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[FMAX]]
 ;
-  %fmax = tail call fast float @_Z4fmaxff(float %x, float %y) #3
+  %fmax = tail call fast float @_Z4fmaxff(float %x, float %y) nobuiltin
   ret float %fmax
 }
-
-attributes #0 = { minsize }
-attributes #1 = { noinline }
-attributes #2 = { strictfp }
-attributes #3 = { nobuiltin }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll
index c9b1112ba39810..f9d14599feb304 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin.ll
@@ -212,7 +212,7 @@ define <16 x half> @test_fmin_v16f16(<16 x half> %x, <16 x half> %y) {
   ret <16 x half> %fmin
 }
 
-define float @test_fmin_f32_minsize(float %x, float %y) #0 {
+define float @test_fmin_f32_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_fmin_f32_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[FMIN:%.*]] = tail call float @llvm.minnum.f32(float [[X]], float [[Y]])
@@ -222,7 +222,7 @@ define float @test_fmin_f32_minsize(float %x, float %y) #0 {
   ret float %fmin
 }
 
-define float @test_fmin_f32_nnan_minsize(float %x, float %y) #0 {
+define float @test_fmin_f32_nnan_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_fmin_f32_nnan_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[FMIN:%.*]] = tail call nnan float @llvm.minnum.f32(float [[X]], float [[Y]])
@@ -238,7 +238,7 @@ define float @test_fmin_f32_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[FMIN:%.*]] = tail call float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    ret float [[FMIN]]
 ;
-  %fmin = tail call float @_Z4fminff(float %x, float %y) #1
+  %fmin = tail call float @_Z4fminff(float %x, float %y) noinline
   ret float %fmin
 }
 
@@ -248,17 +248,17 @@ define float @test_fmin_f32_nnan_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[FMIN:%.*]] = tail call nnan float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[FMIN]]
 ;
-  %fmin = tail call nnan float @_Z4fminff(float %x, float %y) #1
+  %fmin = tail call nnan float @_Z4fminff(float %x, float %y) noinline
   ret float %fmin
 }
 
-define float @test_fmin_f32_strictfp(float %x, float %y) #2 {
+define float @test_fmin_f32_strictfp(float %x, float %y) strictfp {
 ; CHECK-LABEL: define float @test_fmin_f32_strictfp
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[FMIN:%.*]] = tail call nnan nsz float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR1]]
 ; CHECK-NEXT:    ret float [[FMIN]]
 ;
-  %fmin = tail call nsz nnan float @_Z4fminff(float %x, float %y) #2
+  %fmin = tail call nsz nnan float @_Z4fminff(float %x, float %y) strictfp
   ret float %fmin
 }
 
@@ -268,11 +268,6 @@ define float @test_fmin_f32_fast_nobuiltin(float %x, float %y) {
 ; CHECK-NEXT:    [[FMIN:%.*]] = tail call fast float @_Z4fminff(float [[X]], float [[Y]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[FMIN]]
 ;
-  %fmin = tail call fast float @_Z4fminff(float %x, float %y) #3
+  %fmin = tail call fast float @_Z4fminff(float %x, float %y) nobuiltin
   ret float %fmin
 }
-
-attributes #0 = { minsize }
-attributes #1 = { noinline }
-attributes #2 = { strictfp }
-attributes #3 = { nobuiltin }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll
index 24082b8c666111..e57153999cd318 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp.ll
@@ -212,7 +212,7 @@ define <16 x half> @test_ldexp_v16f16(<16 x half> %x, <16 x i32> %y) {
   ret <16 x half> %ldexp
 }
 
-define float @test_ldexp_f32_minsize(float %x, i32 %y) #3 {
+define float @test_ldexp_f32_minsize(float %x, i32 %y) minsize {
 ; CHECK-LABEL: define float @test_ldexp_f32_minsize
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[LDEXP:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
@@ -228,22 +228,16 @@ define float @test_ldexp_f32_nobuiltin(float %x, i32 %y) {
 ; CHECK-NEXT:    [[LDEXP:%.*]] = tail call float @_Z5ldexpfi(float [[X]], i32 [[Y]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    ret float [[LDEXP]]
 ;
-  %ldexp = tail call float @_Z5ldexpfi(float %x, i32 %y) #0
+  %ldexp = tail call float @_Z5ldexpfi(float %x, i32 %y) nobuiltin
   ret float %ldexp
 }
 
-define float @test_ldexp_f32_strictfp(float %x, i32 %y) #4 {
+define float @test_ldexp_f32_strictfp(float %x, i32 %y) strictfp {
 ; CHECK-LABEL: define float @test_ldexp_f32_strictfp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[LDEXP:%.*]] = tail call nnan float @_Z5ldexpfi(float [[X]], i32 [[Y]]) #[[ATTR1]]
 ; CHECK-NEXT:    ret float [[LDEXP]]
 ;
-  %ldexp = tail call nnan float @_Z5ldexpfi(float %x, i32 %y) #4
+  %ldexp = tail call nnan float @_Z5ldexpfi(float %x, i32 %y) strictfp
   ret float %ldexp
 }
-
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { minsize }
-attributes #4 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll
index ec0b2283aef022..394caf7a5d2e0b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log.ll
@@ -280,7 +280,7 @@ define float @test_log_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[LOG]]
 ;
-  %log = tail call float @_Z3logf(float %arg) #0, !fpmath !0
+  %log = tail call float @_Z3logf(float %arg) nobuiltin, !fpmath !0
   ret float %log
 }
 
@@ -290,7 +290,7 @@ define <2 x float> @test_log_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[LOG]]
 ;
-  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0, !fpmath !0
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %log
 }
 
@@ -300,7 +300,7 @@ define float @test_log_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[LOG]]
 ;
-  %log = tail call float @_Z3logf(float %arg) #0
+  %log = tail call float @_Z3logf(float %arg) nobuiltin
   ret float %log
 }
 
@@ -310,48 +310,48 @@ define <2 x float> @test_log_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[LOG]]
 ;
-  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %log
 }
 
 ; "no-builtins" should be ignored
-define float @test_log_f32_nobuiltins(float %arg) #1 {
+define float @test_log_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_log_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret float [[LOG]]
 ;
-  %log = tail call float @_Z3logf(float %arg) #0, !fpmath !0
+  %log = tail call float @_Z3logf(float %arg) nobuiltin, !fpmath !0
   ret float %log
 }
 
-define <2 x float> @test_log_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_log_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_log_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[LOG]]
 ;
-  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0, !fpmath !0
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %log
 }
 
-define float @test_log_cr_f32_nobuiltins(float %arg) #1 {
+define float @test_log_cr_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_log_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call float @_Z3logf(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[LOG]]
 ;
-  %log = tail call float @_Z3logf(float %arg) #0
+  %log = tail call float @_Z3logf(float %arg) nobuiltin
   ret float %log
 }
 
-define <2 x float> @test_log_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_log_cr_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_log_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call <2 x float> @_Z3logDv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[LOG]]
 ;
-  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) #0
+  %log = tail call <2 x float> @_Z3logDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %log
 }
 
@@ -416,8 +416,8 @@ define <2 x float> @test_log_cr_v2f32_preserve_flags(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @logf(float) #2
-declare double @log(double) #2
+declare float @logf(float) nounwind memory(none)
+declare double @log(double) nounwind memory(none)
 
 define float @test_libm_log_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_log_f32
@@ -485,11 +485,11 @@ define float @test_log_f32_fast_noinline(float %arg) {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call fast float @_Z3logf(float [[ARG]]) #[[ATTR7:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[LOG]]
 ;
-  %log = tail call fast float @_Z3logf(float %arg) #3, !fpmath !0
+  %log = tail call fast float @_Z3logf(float %arg) noinline, !fpmath !0
   ret float %log
 }
 
-define float @test_log_f32_fast_optsize(float %arg) #4 {
+define float @test_log_f32_fast_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_log_f32_fast_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call fast float @llvm.log.f32(float [[ARG]]), !fpmath !0
@@ -499,7 +499,7 @@ define float @test_log_f32_fast_optsize(float %arg) #4 {
   ret float %log
 }
 
-define float @test_log_f32_fast_minsize(float %arg) #5 {
+define float @test_log_f32_fast_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_log_f32_fast_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call fast float @llvm.log.f32(float [[ARG]]), !fpmath !0
@@ -509,7 +509,7 @@ define float @test_log_f32_fast_minsize(float %arg) #5 {
   ret float %log
 }
 
-define float @test_log_f32_nsz_contract_optsize(float %arg) #4 {
+define float @test_log_f32_nsz_contract_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_log_f32_nsz_contract_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz contract float @llvm.log.f32(float [[ARG]]), !fpmath !0
@@ -519,7 +519,7 @@ define float @test_log_f32_nsz_contract_optsize(float %arg) #4 {
   ret float %log
 }
 
-define float @test_log_f32_nsz_contract_minsize(float %arg) #5 {
+define float @test_log_f32_nsz_contract_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_log_f32_nsz_contract_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz contract float @_Z3logf(float [[ARG]]), !fpmath !0
@@ -529,7 +529,7 @@ define float @test_log_f32_nsz_contract_minsize(float %arg) #5 {
   ret float %log
 }
 
-define half @test_log_f16_fast_minsize(half %arg) #5 {
+define half @test_log_f16_fast_minsize(half %arg) minsize {
 ; CHECK-LABEL: define half @test_log_f16_fast_minsize
 ; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call half @_Z3logDh(half [[ARG]])
@@ -539,23 +539,15 @@ define half @test_log_f16_fast_minsize(half %arg) #5 {
   ret half %log
 }
 
-define float @test_log_f32_strictfp(float %arg) #6 {
+define float @test_log_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_log_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz float @_Z3logf(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[LOG]]
 ;
-  %log = tail call nsz float @_Z3logf(float %arg) #6
+  %log = tail call nsz float @_Z3logf(float %arg) strictfp
   ret float %log
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { noinline }
-attributes #4 = { optsize }
-attributes #5 = { minsize }
-attributes #6 = { strictfp }
-
 !0 = !{float 3.000000e+00}
 !1 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll
index 1a03f9c4200995..6d91f8326a9e45 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log10.ll
@@ -280,7 +280,7 @@ define float @test_log10_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[LOG10]]
 ;
-  %log10 = tail call float @_Z5log10f(float %arg) #0, !fpmath !0
+  %log10 = tail call float @_Z5log10f(float %arg) nobuiltin, !fpmath !0
   ret float %log10
 }
 
@@ -290,7 +290,7 @@ define <2 x float> @test_log10_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[LOG10]]
 ;
-  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %log10
 }
 
@@ -300,7 +300,7 @@ define float @test_log10_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[LOG10]]
 ;
-  %log10 = tail call float @_Z5log10f(float %arg) #0
+  %log10 = tail call float @_Z5log10f(float %arg) nobuiltin
   ret float %log10
 }
 
@@ -310,48 +310,48 @@ define <2 x float> @test_log10_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[LOG10]]
 ;
-  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %log10
 }
 
 ; "no-builtins" should be ignored
-define float @test_log10_f32_nobuiltins(float %arg) #1 {
+define float @test_log10_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_log10_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret float [[LOG10]]
 ;
-  %log10 = tail call float @_Z5log10f(float %arg) #0, !fpmath !0
+  %log10 = tail call float @_Z5log10f(float %arg) nobuiltin, !fpmath !0
   ret float %log10
 }
 
-define <2 x float> @test_log10_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_log10_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_log10_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[LOG10]]
 ;
-  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %log10
 }
 
-define float @test_log10_cr_f32_nobuiltins(float %arg) #1 {
+define float @test_log10_cr_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_log10_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call float @_Z5log10f(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[LOG10]]
 ;
-  %log10 = tail call float @_Z5log10f(float %arg) #0
+  %log10 = tail call float @_Z5log10f(float %arg) nobuiltin
   ret float %log10
 }
 
-define <2 x float> @test_log10_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_log10_cr_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_log10_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[LOG10]]
 ;
-  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) #0
+  %log10 = tail call <2 x float> @_Z5log10Dv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %log10
 }
 
@@ -416,8 +416,8 @@ define <2 x float> @test_log10_cr_v2f32_preserve_flags(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @log10f(float) #2
-declare double @log10(double) #2
+declare float @log10f(float) nounwind memory(none)
+declare double @log10(double) nounwind memory(none)
 
 define float @test_libm_log10_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_log10_f32
@@ -485,11 +485,11 @@ define float @test_log10_f32_fast_noinline(float %arg) {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast float @_Z5log10f(float [[ARG]]) #[[ATTR7:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[LOG10]]
 ;
-  %log10 = tail call fast float @_Z5log10f(float %arg) #3, !fpmath !0
+  %log10 = tail call fast float @_Z5log10f(float %arg) noinline, !fpmath !0
   ret float %log10
 }
 
-define float @test_log10_f32_fast_optsize(float %arg) #4 {
+define float @test_log10_f32_fast_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_log10_f32_fast_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast float @llvm.log10.f32(float [[ARG]]), !fpmath !0
@@ -499,7 +499,7 @@ define float @test_log10_f32_fast_optsize(float %arg) #4 {
   ret float %log10
 }
 
-define float @test_log10_f32_fast_minsize(float %arg) #5 {
+define float @test_log10_f32_fast_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_log10_f32_fast_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast float @llvm.log10.f32(float [[ARG]]), !fpmath !0
@@ -509,7 +509,7 @@ define float @test_log10_f32_fast_minsize(float %arg) #5 {
   ret float %log10
 }
 
-define float @test_log10_f32_nsz_contract_optsize(float %arg) #4 {
+define float @test_log10_f32_nsz_contract_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_log10_f32_nsz_contract_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call nsz contract float @llvm.log10.f32(float [[ARG]]), !fpmath !0
@@ -519,7 +519,7 @@ define float @test_log10_f32_nsz_contract_optsize(float %arg) #4 {
   ret float %log10
 }
 
-define float @test_log10_f32_nsz_contract_minsize(float %arg) #5 {
+define float @test_log10_f32_nsz_contract_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_log10_f32_nsz_contract_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call nsz contract float @_Z5log10f(float [[ARG]]), !fpmath !0
@@ -529,7 +529,7 @@ define float @test_log10_f32_nsz_contract_minsize(float %arg) #5 {
   ret float %log10
 }
 
-define half @test_log10_f16_fast_minsize(half %arg) #5 {
+define half @test_log10_f16_fast_minsize(half %arg) minsize {
 ; CHECK-LABEL: define half @test_log10_f16_fast_minsize
 ; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[LOG10:%.*]] = tail call fast half @llvm.log10.f16(half [[ARG]])
@@ -539,23 +539,15 @@ define half @test_log10_f16_fast_minsize(half %arg) #5 {
   ret half %log10
 }
 
-define float @test_log10_f32_strictfp(float %arg) #6 {
+define float @test_log10_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_log10_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz float @_Z5log10f(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[LOG]]
 ;
-  %log = tail call nsz float @_Z5log10f(float %arg) #6
+  %log = tail call nsz float @_Z5log10f(float %arg) strictfp
   ret float %log
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { noinline }
-attributes #4 = { optsize }
-attributes #5 = { minsize }
-attributes #6 = { strictfp }
-
 !0 = !{float 3.000000e+00}
 !1 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll
index a6b3265d92d2c2..2eeb1284aabcf0 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-log2.ll
@@ -280,7 +280,7 @@ define float @test_log2_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR6:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[LOG2]]
 ;
-  %log2 = tail call float @_Z4log2f(float %arg) #0, !fpmath !0
+  %log2 = tail call float @_Z4log2f(float %arg) nobuiltin, !fpmath !0
   ret float %log2
 }
 
@@ -290,7 +290,7 @@ define <2 x float> @test_log2_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[LOG2]]
 ;
-  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %log2
 }
 
@@ -300,7 +300,7 @@ define float @test_log2_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[LOG2]]
 ;
-  %log2 = tail call float @_Z4log2f(float %arg) #0
+  %log2 = tail call float @_Z4log2f(float %arg) nobuiltin
   ret float %log2
 }
 
@@ -310,48 +310,48 @@ define <2 x float> @test_log2_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[LOG2]]
 ;
-  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %log2
 }
 
 ; "no-builtins" should be ignored
-define float @test_log2_f32_nobuiltins(float %arg) #1 {
+define float @test_log2_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_log2_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret float [[LOG2]]
 ;
-  %log2 = tail call float @_Z4log2f(float %arg) #0, !fpmath !0
+  %log2 = tail call float @_Z4log2f(float %arg) nobuiltin, !fpmath !0
   ret float %log2
 }
 
-define <2 x float> @test_log2_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_log2_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_log2_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]], !fpmath !0
 ; CHECK-NEXT:    ret <2 x float> [[LOG2]]
 ;
-  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0, !fpmath !0
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %log2
 }
 
-define float @test_log2_cr_f32_nobuiltins(float %arg) #1 {
+define float @test_log2_cr_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_log2_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call float @_Z4log2f(float [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret float [[LOG2]]
 ;
-  %log2 = tail call float @_Z4log2f(float %arg) #0
+  %log2 = tail call float @_Z4log2f(float %arg) nobuiltin
   ret float %log2
 }
 
-define <2 x float> @test_log2_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_log2_cr_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_log2_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> [[ARG]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[LOG2]]
 ;
-  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) #0
+  %log2 = tail call <2 x float> @_Z4log2Dv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %log2
 }
 
@@ -416,8 +416,8 @@ define <2 x float> @test_log2_cr_v2f32_preserve_flags(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @log2f(float) #2
-declare double @log2(double) #2
+declare float @log2f(float) nounwind memory(none)
+declare double @log2(double) nounwind memory(none)
 
 define float @test_libm_log2_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_log2_f32
@@ -485,11 +485,11 @@ define float @test_log2_f32_fast_noinline(float %arg) {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast float @_Z4log2f(float [[ARG]]) #[[ATTR7:[0-9]+]], !fpmath !0
 ; CHECK-NEXT:    ret float [[LOG2]]
 ;
-  %log2 = tail call fast float @_Z4log2f(float %arg) #3, !fpmath !0
+  %log2 = tail call fast float @_Z4log2f(float %arg) noinline, !fpmath !0
   ret float %log2
 }
 
-define float @test_log2_f32_fast_optsize(float %arg) #4 {
+define float @test_log2_f32_fast_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_log2_f32_fast_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast float @llvm.log2.f32(float [[ARG]]), !fpmath !0
@@ -499,7 +499,7 @@ define float @test_log2_f32_fast_optsize(float %arg) #4 {
   ret float %log2
 }
 
-define float @test_log2_f32_fast_minsize(float %arg) #5 {
+define float @test_log2_f32_fast_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_log2_f32_fast_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast float @llvm.log2.f32(float [[ARG]]), !fpmath !0
@@ -509,7 +509,7 @@ define float @test_log2_f32_fast_minsize(float %arg) #5 {
   ret float %log2
 }
 
-define float @test_log2_f32_nsz_contract_optsize(float %arg) #4 {
+define float @test_log2_f32_nsz_contract_optsize(float %arg) optsize {
 ; CHECK-LABEL: define float @test_log2_f32_nsz_contract_optsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call nsz contract float @llvm.log2.f32(float [[ARG]]), !fpmath !0
@@ -519,7 +519,7 @@ define float @test_log2_f32_nsz_contract_optsize(float %arg) #4 {
   ret float %log2
 }
 
-define float @test_log2_f32_nsz_contract_minsize(float %arg) #5 {
+define float @test_log2_f32_nsz_contract_minsize(float %arg) minsize {
 ; CHECK-LABEL: define float @test_log2_f32_nsz_contract_minsize
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call nsz contract float @_Z4log2f(float [[ARG]]), !fpmath !0
@@ -529,7 +529,7 @@ define float @test_log2_f32_nsz_contract_minsize(float %arg) #5 {
   ret float %log2
 }
 
-define half @test_log2_f16_fast_minsize(half %arg) #5 {
+define half @test_log2_f16_fast_minsize(half %arg) minsize {
 ; CHECK-LABEL: define half @test_log2_f16_fast_minsize
 ; CHECK-SAME: (half [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[LOG2:%.*]] = tail call fast half @llvm.log2.f16(half [[ARG]])
@@ -539,23 +539,15 @@ define half @test_log2_f16_fast_minsize(half %arg) #5 {
   ret half %log2
 }
 
-define float @test_log2_f32_strictfp(float %arg) #6 {
+define float @test_log2_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_log2_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    [[LOG:%.*]] = tail call nsz float @_Z4log2f(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[LOG]]
 ;
-  %log = tail call nsz float @_Z4log2f(float %arg) #6
+  %log = tail call nsz float @_Z4log2f(float %arg) strictfp
   ret float %log
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { noinline }
-attributes #4 = { optsize }
-attributes #5 = { minsize }
-attributes #6 = { strictfp }
-
 !0 = !{float 3.000000e+00}
 !1 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll
index ad7402c9f3a848..55a2b9abbe43ce 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-mad.ll
@@ -218,11 +218,11 @@ define float @test_mad_f32_noinline(float %x, float %y, float %z) {
 ; CHECK-NEXT:    [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    ret float [[MAD]]
 ;
-  %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) #1
+  %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) noinline
   ret float %mad
 }
 
-define float @test_mad_f32_fast_minsize(float %x, float %y, float %z) #0 {
+define float @test_mad_f32_fast_minsize(float %x, float %y, float %z) minsize {
 ; CHECK-LABEL: define float @test_mad_f32_fast_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[MAD:%.*]] = tail call fast float @llvm.fmuladd.f32(float [[X]], float [[Y]], float [[Z]])
@@ -232,13 +232,13 @@ define float @test_mad_f32_fast_minsize(float %x, float %y, float %z) #0 {
   ret float %mad
 }
 
-define float @test_mad_f32_fast_strictfp(float %x, float %y, float %z) #2 {
+define float @test_mad_f32_fast_strictfp(float %x, float %y, float %z) strictfp {
 ; CHECK-LABEL: define float @test_mad_f32_fast_strictfp
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[MAD:%.*]] = tail call nnan nsz float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR1]]
 ; CHECK-NEXT:    ret float [[MAD]]
 ;
-  %mad = tail call nsz nnan float @_Z3madfff(float %x, float %y, float %z) #2
+  %mad = tail call nsz nnan float @_Z3madfff(float %x, float %y, float %z) strictfp
   ret float %mad
 }
 
@@ -248,11 +248,6 @@ define float @test_mad_f32_fast_nobuiltin(float %x, float %y, float %z) {
 ; CHECK-NEXT:    [[MAD:%.*]] = tail call fast float @_Z3madfff(float [[X]], float [[Y]], float [[Z]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[MAD]]
 ;
-  %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) #3
+  %mad = tail call fast float @_Z3madfff(float %x, float %y, float %z) nobuiltin
   ret float %mad
 }
-
-attributes #0 = { minsize }
-attributes #1 = { noinline }
-attributes #2 = { strictfp }
-attributes #3 = { nobuiltin }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index c4bd4bc126f735..955885457d9aac 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -460,7 +460,7 @@ define <16 x half> @test_pow_v16f16(<16 x half> %x, <16 x half> %y) {
   ret <16 x half> %pow
 }
 
-define float @test_pow_afn_f32_minsize(float %x, float %y) #0 {
+define float @test_pow_afn_f32_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_pow_afn_f32_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y]])
@@ -470,7 +470,7 @@ define float @test_pow_afn_f32_minsize(float %x, float %y) #0 {
   ret float %pow
 }
 
-define float @test_pow_afn_f32_nnan_minsize(float %x, float %y) #0 {
+define float @test_pow_afn_f32_nnan_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_pow_afn_f32_nnan_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y]])
@@ -486,7 +486,7 @@ define float @test_pow_afn_f32_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR5:[0-9]+]]
 ; CHECK-NEXT:    ret float [[POW]]
 ;
-  %pow = tail call afn float @_Z3powff(float %x, float %y) #1
+  %pow = tail call afn float @_Z3powff(float %x, float %y) noinline
   ret float %pow
 }
 
@@ -496,17 +496,17 @@ define float @test_pow_afn_f32_nnan_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR5]]
 ; CHECK-NEXT:    ret float [[POW]]
 ;
-  %pow = tail call afn nnan float @_Z3powff(float %x, float %y) #1
+  %pow = tail call afn nnan float @_Z3powff(float %x, float %y) noinline
   ret float %pow
 }
 
-define float @test_pow_afn_f32_strictfp(float %x, float %y) #2 {
+define float @test_pow_afn_f32_strictfp(float %x, float %y) strictfp {
 ; CHECK-LABEL: define float @test_pow_afn_f32_strictfp
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    [[POW:%.*]] = tail call nnan nsz afn float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[POW]]
 ;
-  %pow = tail call afn nsz nnan float @_Z3powff(float %x, float %y) #2
+  %pow = tail call afn nsz nnan float @_Z3powff(float %x, float %y) strictfp
   ret float %pow
 }
 
@@ -516,7 +516,7 @@ define float @test_pow_fast_f32_nobuiltin(float %x, float %y) {
 ; CHECK-NEXT:    [[POW:%.*]] = tail call fast float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    ret float [[POW]]
 ;
-  %pow = tail call fast float @_Z3powff(float %x, float %y) #3
+  %pow = tail call fast float @_Z3powff(float %x, float %y) nobuiltin
   ret float %pow
 }
 
@@ -2672,8 +2672,3 @@ define float @test_pow_f32__y_known_integral_roundeven(float %x, float nofpclass
   %pow = tail call float @_Z3powff(float %x, float %y)
   ret float %pow
 }
-
-attributes #0 = { minsize }
-attributes #1 = { noinline }
-attributes #2 = { strictfp }
-attributes #3 = { nobuiltin }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index 942f459ea6b8ca..5aa1dd1df8956e 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -812,11 +812,11 @@ define float @test_pown_fast_f32_nobuiltin(float %x, i32 %y) {
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
 entry:
-  %call = tail call fast float @_Z4pownfi(float %x, i32 %y) #0
+  %call = tail call fast float @_Z4pownfi(float %x, i32 %y) nobuiltin
   ret float %call
 }
 
-define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
+define float @test_pown_fast_f32_strictfp(float %x, i32 %y) strictfp {
 ; CHECK-LABEL: define float @test_pown_fast_f32_strictfp
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
@@ -834,7 +834,7 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
 ; CHECK-NEXT:    ret float [[TMP3]]
 ;
 entry:
-  %call = tail call fast float @_Z4pownfi(float %x, i32 %y) #1
+  %call = tail call fast float @_Z4pownfi(float %x, i32 %y) strictfp
   ret float %call
 }
 
@@ -1157,6 +1157,3 @@ entry:
   %call = tail call fast float @_Z4pownfi(float %x, i32 %y)
   ret float %call
 }
-
-attributes #0 = { nobuiltin }
-attributes #1 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
index dc4cf1d067ef18..ed9b1ef8f765b7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
@@ -426,7 +426,7 @@ define <16 x half> @test_powr_v16f16(<16 x half> %x, <16 x half> %y) {
   ret <16 x half> %powr
 }
 
-define float @test_powr_afn_f32_minsize(float %x, float %y) #0 {
+define float @test_powr_afn_f32_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_powr_afn_f32_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[POWR:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y]])
@@ -436,7 +436,7 @@ define float @test_powr_afn_f32_minsize(float %x, float %y) #0 {
   ret float %powr
 }
 
-define float @test_powr_afn_f32_nnan_minsize(float %x, float %y) #0 {
+define float @test_powr_afn_f32_nnan_minsize(float %x, float %y) minsize {
 ; CHECK-LABEL: define float @test_powr_afn_f32_nnan_minsize
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[POWR:%.*]] = tail call nnan afn float @_Z4powrff(float [[X]], float [[Y]])
@@ -452,7 +452,7 @@ define float @test_powr_afn_f32_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[POWR:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[POWR]]
 ;
-  %powr = tail call afn float @_Z4powrff(float %x, float %y) #1
+  %powr = tail call afn float @_Z4powrff(float %x, float %y) noinline
   ret float %powr
 }
 
@@ -462,17 +462,17 @@ define float @test_powr_afn_f32_nnan_noinline(float %x, float %y) {
 ; CHECK-NEXT:    [[POWR:%.*]] = tail call nnan afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[POWR]]
 ;
-  %powr = tail call afn nnan float @_Z4powrff(float %x, float %y) #1
+  %powr = tail call afn nnan float @_Z4powrff(float %x, float %y) noinline
   ret float %powr
 }
 
-define float @test_powr_afn_f32_strictfp(float %x, float %y) #2 {
+define float @test_powr_afn_f32_strictfp(float %x, float %y) strictfp {
 ; CHECK-LABEL: define float @test_powr_afn_f32_strictfp
 ; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[POWR:%.*]] = tail call nnan nsz afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR1]]
 ; CHECK-NEXT:    ret float [[POWR]]
 ;
-  %powr = tail call afn nsz nnan float @_Z4powrff(float %x, float %y) #2
+  %powr = tail call afn nsz nnan float @_Z4powrff(float %x, float %y) strictfp
   ret float %powr
 }
 
@@ -482,7 +482,7 @@ define float @test_powr_fast_f32_nobuiltin(float %x, float %y) {
 ; CHECK-NEXT:    [[POWR:%.*]] = tail call fast float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR5:[0-9]+]]
 ; CHECK-NEXT:    ret float [[POWR]]
 ;
-  %powr = tail call fast float @_Z4powrff(float %x, float %y) #3
+  %powr = tail call fast float @_Z4powrff(float %x, float %y) nobuiltin
   ret float %powr
 }
 
@@ -1208,8 +1208,3 @@ define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x flo
   %powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
   ret <2 x float> %powr
 }
-
-attributes #0 = { minsize }
-attributes #1 = { noinline }
-attributes #2 = { strictfp }
-attributes #3 = { nobuiltin }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll
index 534a42b039790b..c22079c2fc2be4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rint.ll
@@ -210,7 +210,7 @@ define float @test_rint_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z4rintf(float [[ARG]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call float @_Z4rintf(float %arg) #0
+  %rint = tail call float @_Z4rintf(float %arg) nobuiltin
   ret float %rint
 }
 
@@ -220,28 +220,28 @@ define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[RINT]]
 ;
-  %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg) #0
+  %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %rint
 }
 
 ; "no-builtins" should be ignored
-define float @test_rint_f32_nobuiltins(float %arg) #1 {
+define float @test_rint_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_rint_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z4rintf(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call float @_Z4rintf(float %arg) #0
+  %rint = tail call float @_Z4rintf(float %arg) nobuiltin
   ret float %rint
 }
 
-define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z4rintDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[RINT]]
 ;
-  %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg) #0
+  %rint = tail call <2 x float> @_Z4rintDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %rint
 }
 
@@ -286,8 +286,8 @@ define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @rintf(float) #2
-declare double @rint(double) #2
+declare float @rintf(float) nounwind memory(none)
+declare double @rint(double) nounwind memory(none)
 
 define float @test_libm_rint_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_rint_f32
@@ -309,19 +309,14 @@ define double @test_libm_rint_f64(double %arg) {
   ret double %rint
 }
 
-define float @test_rint_f32_strictfp(float %arg) #3 {
+define float @test_rint_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_rint_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan float @_Z4rintf(float [[ARG]]) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call nnan float @_Z4rintf(float %arg) #3
+  %rint = tail call nnan float @_Z4rintf(float %arg) strictfp
   ret float %rint
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { strictfp }
-
 !0 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
index 2ffa647d1869a5..24ae9509cca901 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
@@ -508,14 +508,14 @@ entry:
   ret float %call
 }
 
-define float @test_rootn_f32__y_1__strictfp(float %x) #1 {
+define float @test_rootn_f32__y_1__strictfp(float %x) strictfp {
 ; CHECK-LABEL: define float @test_rootn_f32__y_1__strictfp(
 ; CHECK-SAME: float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret float [[X]]
 ;
 entry:
-  %call = tail call float @_Z5rootnfi(float %x, i32 1) #1
+  %call = tail call float @_Z5rootnfi(float %x, i32 1) strictfp
   ret float %call
 }
 
@@ -531,7 +531,7 @@ entry:
   ret <2 x float> %call
 }
 
-define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 {
+define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) strictfp {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1__strictfp(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
@@ -539,7 +539,7 @@ define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 {
 ; CHECK-NEXT:    ret <2 x float> [[CALL]]
 ;
 entry:
-  %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 1>) #1
+  %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 1>) strictfp
   ret <2 x float> %call
 }
 
@@ -892,7 +892,7 @@ entry:
   ret float %call
 }
 
-define float @test_rootn_f32__y_neg2__strictfp(float %x) #1 {
+define float @test_rootn_f32__y_neg2__strictfp(float %x) strictfp {
 ; CHECK-LABEL: define float @test_rootn_f32__y_neg2__strictfp(
 ; CHECK-SAME: float [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
@@ -900,7 +900,7 @@ define float @test_rootn_f32__y_neg2__strictfp(float %x) #1 {
 ; CHECK-NEXT:    ret float [[__ROOTN2RSQRT]]
 ;
 entry:
-  %call = tail call float @_Z5rootnfi(float %x, i32 -2) #1
+  %call = tail call float @_Z5rootnfi(float %x, i32 -2) strictfp
   ret float %call
 }
 
@@ -912,7 +912,7 @@ define float @test_rootn_f32__y_neg2__noinline(float %x) {
 ; CHECK-NEXT:    ret float [[__ROOTN2RSQRT]]
 ;
 entry:
-  %call = tail call float @_Z5rootnfi(float %x, i32 -2) #2
+  %call = tail call float @_Z5rootnfi(float %x, i32 -2) noinline
   ret float %call
 }
 
@@ -924,7 +924,7 @@ define float @test_rootn_f32__y_neg2__nobuiltin(float %x) {
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
 entry:
-  %call = tail call float @_Z5rootnfi(float %x, i32 -2) #0
+  %call = tail call float @_Z5rootnfi(float %x, i32 -2) nobuiltin
   ret float %call
 }
 
@@ -952,7 +952,7 @@ entry:
   ret <2 x float> %call
 }
 
-define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(<2 x float> %x) #1 {
+define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(<2 x float> %x) strictfp {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
@@ -960,7 +960,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(<2 x float> %x) #1 {
 ; CHECK-NEXT:    ret <2 x float> [[CALL]]
 ;
 entry:
-  %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 -2, i32 -2>) #1
+  %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 -2, i32 -2>) strictfp
   ret <2 x float> %call
 }
 
@@ -1136,11 +1136,11 @@ define float @test_rootn_fast_f32_nobuiltin(float %x, i32 %y) {
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
 entry:
-  %call = tail call fast float @_Z5rootnfi(float %x, i32 %y) #0
+  %call = tail call fast float @_Z5rootnfi(float %x, i32 %y) nobuiltin
   ret float %call
 }
 
-define float @test_rootn_fast_f32_strictfp(float %x, i32 %y) #1 {
+define float @test_rootn_fast_f32_strictfp(float %x, i32 %y) strictfp {
 ; CHECK-LABEL: define float @test_rootn_fast_f32_strictfp(
 ; CHECK-SAME: float [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
@@ -1148,7 +1148,7 @@ define float @test_rootn_fast_f32_strictfp(float %x, i32 %y) #1 {
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
 entry:
-  %call = tail call fast float @_Z5rootnfi(float %x, i32 %y) #1
+  %call = tail call fast float @_Z5rootnfi(float %x, i32 %y) strictfp
   ret float %call
 }
 
@@ -1430,7 +1430,7 @@ define float @test_rootn_f32__y_0_nobuiltin(float %x) {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 0) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
-  %call = tail call float @_Z5rootnfi(float %x, i32 0) #0
+  %call = tail call float @_Z5rootnfi(float %x, i32 0) nobuiltin
   ret float %call
 }
 
@@ -1440,7 +1440,7 @@ define float @test_rootn_f32__y_1_nobuiltin(float %x) {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 1) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
-  %call = tail call float @_Z5rootnfi(float %x, i32 1) #0
+  %call = tail call float @_Z5rootnfi(float %x, i32 1) nobuiltin
   ret float %call
 }
 
@@ -1450,7 +1450,7 @@ define float @test_rootn_f32__y_2_nobuiltin(float %x) {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 2) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
-  %call = tail call float @_Z5rootnfi(float %x, i32 2) #0
+  %call = tail call float @_Z5rootnfi(float %x, i32 2) nobuiltin
   ret float %call
 }
 
@@ -1460,7 +1460,7 @@ define float @test_rootn_f32__y_3_nobuiltin(float %x) {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 3) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
-  %call = tail call float @_Z5rootnfi(float %x, i32 3) #0
+  %call = tail call float @_Z5rootnfi(float %x, i32 3) nobuiltin
   ret float %call
 }
 
@@ -1470,7 +1470,7 @@ define float @test_rootn_f32__y_neg1_nobuiltin(float %x) {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 -1) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
-  %call = tail call float @_Z5rootnfi(float %x, i32 -1) #0
+  %call = tail call float @_Z5rootnfi(float %x, i32 -1) nobuiltin
   ret float %call
 }
 
@@ -1480,14 +1480,10 @@ define float @test_rootn_f32__y_neg2_nobuiltin(float %x) {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 -2) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[CALL]]
 ;
-  %call = tail call float @_Z5rootnfi(float %x, i32 -2) #0
+  %call = tail call float @_Z5rootnfi(float %x, i32 -2) nobuiltin
   ret float %call
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { strictfp }
-attributes #2 = { noinline }
-
 
 !0 = !{float 3.0}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll
index 8a4697983bb1e7..0566e20dfbf60f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-round.ll
@@ -210,7 +210,7 @@ define float @test_rint_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5roundf(float [[ARG]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call float @_Z5roundf(float %arg) #0
+  %rint = tail call float @_Z5roundf(float %arg) nobuiltin
   ret float %rint
 }
 
@@ -220,28 +220,28 @@ define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[RINT]]
 ;
-  %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg) #0
+  %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %rint
 }
 
 ; "no-builtins" should be ignored
-define float @test_rint_f32_nobuiltins(float %arg) #1 {
+define float @test_rint_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_rint_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5roundf(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call float @_Z5roundf(float %arg) #0
+  %rint = tail call float @_Z5roundf(float %arg) nobuiltin
   ret float %rint
 }
 
-define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5roundDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[RINT]]
 ;
-  %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg) #0
+  %rint = tail call <2 x float> @_Z5roundDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %rint
 }
 
@@ -286,8 +286,8 @@ define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @rintf(float) #2
-declare double @rint(double) #2
+declare float @rintf(float) nounwind memory(none)
+declare double @rint(double) nounwind memory(none)
 
 define float @test_libm_rint_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_rint_f32
@@ -309,19 +309,14 @@ define double @test_libm_rint_f64(double %arg) {
   ret double %rint
 }
 
-define float @test_rint_f32_strictfp(float %arg) #3 {
+define float @test_rint_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_rint_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan float @_Z5roundf(float [[ARG]]) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call nnan float @_Z5roundf(float %arg) #3
+  %rint = tail call nnan float @_Z5roundf(float %arg) strictfp
   ret float %rint
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { strictfp }
-
 !0 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
index 5c56276eeb0f1c..89c1095364df10 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
@@ -3,49 +3,49 @@
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
 
-declare float @_Z3sinf(float) #0
-declare float @_Z3cosf(float) #0
-declare <2 x float> @_Z3sinDv2_f(<2 x float>) #0
-declare <2 x float> @_Z3cosDv2_f(<2 x float>) #0
-declare <3 x float> @_Z3sinDv3_f(<3 x float>) #0
-declare <3 x float> @_Z3cosDv3_f(<3 x float>) #0
-declare <4 x float> @_Z3sinDv4_f(<4 x float>) #0
-declare <4 x float> @_Z3cosDv4_f(<4 x float>) #0
-declare <8 x float> @_Z3sinDv8_f(<8 x float>) #0
-declare <8 x float> @_Z3cosDv8_f(<8 x float>) #0
-declare <16 x float> @_Z3sinDv16_f(<16 x float>) #0
-declare <16 x float> @_Z3cosDv16_f(<16 x float>) #0
-
-
-declare half @_Z3sinDh(half) #0
-declare half @_Z3cosDh(half) #0
-declare <2 x half> @_Z3sinDv2_Dh(<2 x half>) #0
-declare <2 x half> @_Z3cosDv2_Dh(<2 x half>) #0
-declare <3 x half> @_Z3sinDv3_Dh(<3 x half>) #0
-declare <3 x half> @_Z3cosDv3_Dh(<3 x half>) #0
-declare <4 x half> @_Z3sinDv4_Dh(<4 x half>) #0
-declare <4 x half> @_Z3cosDv4_Dh(<4 x half>) #0
-declare <8 x half> @_Z3sinDv8_Dh(<8 x half>) #0
-declare <8 x half> @_Z3cosDv8_Dh(<8 x half>) #0
-declare <16 x half> @_Z3sinDv16_Dh(<16 x half>) #0
-declare <16 x half> @_Z3cosDv16_Dh(<16 x half>) #0
-
-
-declare double @_Z3sind(double) #0
-declare double @_Z3cosd(double) #0
-declare <2 x double> @_Z3sinDv2_d(<2 x double>) #0
-declare <2 x double> @_Z3cosDv2_d(<2 x double>) #0
-declare <3 x double> @_Z3sinDv3_d(<3 x double>) #0
-declare <3 x double> @_Z3cosDv3_d(<3 x double>) #0
-declare <4 x double> @_Z3sinDv4_d(<4 x double>) #0
-declare <4 x double> @_Z3cosDv4_d(<4 x double>) #0
-declare <8 x double> @_Z3sinDv8_d(<8 x double>) #0
-declare <8 x double> @_Z3cosDv8_d(<8 x double>) #0
-declare <16 x double> @_Z3sinDv16_d(<16 x double>) #0
-declare <16 x double> @_Z3cosDv16_d(<16 x double>) #0
-
-declare float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) writeonly %ptr) #1
-declare float @_Z6sincosfPU3AS0f(float %x, ptr writeonly %ptr) #1
+declare float @_Z3sinf(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @_Z3cosf(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @_Z3sinDv2_f(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @_Z3cosDv2_f(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @_Z3sinDv3_f(<3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @_Z3cosDv3_f(<3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @_Z3sinDv4_f(<4 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @_Z3cosDv4_f(<4 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <8 x float> @_Z3sinDv8_f(<8 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <8 x float> @_Z3cosDv8_f(<8 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <16 x float> @_Z3sinDv16_f(<16 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <16 x float> @_Z3cosDv16_f(<16 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+
+
+declare half @_Z3sinDh(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @_Z3cosDh(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @_Z3sinDv2_Dh(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @_Z3cosDv2_Dh(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x half> @_Z3sinDv3_Dh(<3 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x half> @_Z3cosDv3_Dh(<3 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x half> @_Z3sinDv4_Dh(<4 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x half> @_Z3cosDv4_Dh(<4 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <8 x half> @_Z3sinDv8_Dh(<8 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <8 x half> @_Z3cosDv8_Dh(<8 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <16 x half> @_Z3sinDv16_Dh(<16 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <16 x half> @_Z3cosDv16_Dh(<16 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+
+
+declare double @_Z3sind(double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare double @_Z3cosd(double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x double> @_Z3sinDv2_d(<2 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x double> @_Z3cosDv2_d(<2 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x double> @_Z3sinDv3_d(<3 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x double> @_Z3cosDv3_d(<3 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x double> @_Z3sinDv4_d(<4 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x double> @_Z3cosDv4_d(<4 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <8 x double> @_Z3sinDv8_d(<8 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <8 x double> @_Z3cosDv8_d(<8 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <16 x double> @_Z3sinDv16_d(<16 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <16 x double> @_Z3cosDv16_d(<16 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+
+declare float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) writeonly %ptr) argmemonly nounwind willreturn
+declare float @_Z6sincosfPU3AS0f(float %x, ptr writeonly %ptr) argmemonly nounwind willreturn
 
 define void @sincos_f16_nocontract(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
 ; CHECK-LABEL: define void @sincos_f16_nocontract
@@ -1275,10 +1275,7 @@ entry:
   ret float %sin2
 }
 
-declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #1 = { argmemonly nounwind willreturn }
+declare void @llvm.dbg.value(metadata, metadata, metadata) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 !llvm.dbg.cu = !{!0}
 !llvm.debugify = !{!2, !3}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll
index 5d765b614db362..02287e3ce7b9fb 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll
@@ -8,7 +8,7 @@ declare float @_Z3cosf(float noundef)
 declare <2 x float> @_Z3sinDv2_f(<2 x float> noundef)
 declare <2 x float> @_Z3cosDv2_f(<2 x float> noundef)
 
-define void @sincos_f32_nobuiltin(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) #0 {
+define void @sincos_f32_nobuiltin(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) nobuiltin {
 ; CHECK: Function Attrs: nobuiltin
 ; CHECK-LABEL: define void @sincos_f32_nobuiltin
 ; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -29,7 +29,7 @@ entry:
   ret void
 }
 
-define void @sincos_v2f32_nobuiltin(<2 x float> noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) #0 {
+define void @sincos_v2f32_nobuiltin(<2 x float> noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) nobuiltin {
 ; CHECK: Function Attrs: nobuiltin
 ; CHECK-LABEL: define void @sincos_v2f32_nobuiltin
 ; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR0]] {
@@ -50,7 +50,7 @@ entry:
   ret void
 }
 
-define void @sincos_f32_no_builtins(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) #1 {
+define void @sincos_f32_no_builtins(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) "no-builtins" {
 ; CHECK-LABEL: define void @sincos_f32_no_builtins
 ; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
@@ -70,7 +70,7 @@ entry:
   ret void
 }
 
-define void @sincos_v2f32_no_builtins(<2 x float> noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) #1 {
+define void @sincos_v2f32_no_builtins(<2 x float> noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) "no-builtins" {
 ; CHECK-LABEL: define void @sincos_v2f32_no_builtins
 ; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
@@ -101,9 +101,9 @@ define void @sincos_f32_nobuiltin_callsite(float noundef %x, ptr addrspace(1) no
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %call = tail call contract float @_Z3sinf(float noundef %x) #0
+  %call = tail call contract float @_Z3sinf(float noundef %x) nobuiltin
   store float %call, ptr addrspace(1) %sin_out, align 4
-  %call1 = tail call contract float @_Z3cosf(float noundef %x) #0
+  %call1 = tail call contract float @_Z3cosf(float noundef %x) nobuiltin
   store float %call1, ptr addrspace(1) %cos_out, align 4
   ret void
 }
@@ -119,7 +119,7 @@ define void @sincos_f32_nobuiltin_callsite0(float noundef %x, ptr addrspace(1) n
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %call = tail call contract float @_Z3sinf(float noundef %x) #0
+  %call = tail call contract float @_Z3sinf(float noundef %x) nobuiltin
   store float %call, ptr addrspace(1) %sin_out, align 4
   %call1 = tail call contract float @_Z3cosf(float noundef %x)
   store float %call1, ptr addrspace(1) %cos_out, align 4
@@ -139,7 +139,7 @@ define void @sincos_f32_nobuiltin_callsite1(float noundef %x, ptr addrspace(1) n
 entry:
   %call = tail call contract float @_Z3sinf(float noundef %x)
   store float %call, ptr addrspace(1) %sin_out, align 4
-  %call1 = tail call contract float @_Z3cosf(float noundef %x) #0
+  %call1 = tail call contract float @_Z3cosf(float noundef %x) nobuiltin
   store float %call1, ptr addrspace(1) %cos_out, align 4
   ret void
 }
@@ -155,16 +155,14 @@ define void @sincos_v2f32_nobuiltin_callsite(<2 x float> noundef %x, ptr addrspa
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %call = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef %x) #0
+  %call = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef %x) nobuiltin
   store <2 x float> %call, ptr addrspace(1) %sin_out, align 8
-  %call1 = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef %x) #0
+  %call1 = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef %x) nobuiltin
   store <2 x float> %call1, ptr addrspace(1) %cos_out, align 8
   ret void
 }
 
 ; TODO: Handle single function forms
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { nobuiltin }
 ; CHECK: attributes #[[ATTR1]] = { "no-builtins" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
index 72f809b3e0607a..e63f31e75cd3cc 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll
@@ -270,7 +270,7 @@ define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3:[0-9]+]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
-  %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
+  %sqrt = tail call float @_Z4sqrtf(float %arg) nobuiltin, !fpmath !0
   ret float %sqrt
 }
 
@@ -280,7 +280,7 @@ define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
-  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %sqrt
 }
 
@@ -290,7 +290,7 @@ define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
-  %sqrt = tail call float @_Z4sqrtf(float %arg) #0
+  %sqrt = tail call float @_Z4sqrtf(float %arg) nobuiltin
   ret float %sqrt
 }
 
@@ -300,48 +300,48 @@ define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
-  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %sqrt
 }
 
 ; "no-builtins" should be ignored
-define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
+define float @test_sqrt_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_sqrt_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
-  %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
+  %sqrt = tail call float @_Z4sqrtf(float %arg) nobuiltin, !fpmath !0
   ret float %sqrt
 }
 
-define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
-  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) nobuiltin, !fpmath !0
   ret <2 x float> %sqrt
 }
 
-define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
+define float @test_sqrt_cr_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
-  %sqrt = tail call float @_Z4sqrtf(float %arg) #0
+  %sqrt = tail call float @_Z4sqrtf(float %arg) nobuiltin
   ret float %sqrt
 }
 
-define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
-  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
+  %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %sqrt
 }
 
@@ -406,8 +406,8 @@ define <2 x float> @test_sqrt_cr_v2f32_preserve_flags(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @sqrtf(float) #2
-declare double @sqrt(double) #2
+declare float @sqrtf(float) nounwind memory(none)
+declare double @sqrt(double) nounwind memory(none)
 
 define float @test_libm_sqrt_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_sqrt_f32
@@ -449,9 +449,5 @@ define double @test_libm_sqrt_f64_fpmath(double %arg) {
   ret double %sqrt
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-
 !0 = !{float 3.000000e+00}
 !1 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll
index 27b6adc1a9c28d..8084dd1858b4a7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll
@@ -210,7 +210,7 @@ define float @test_rint_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call float @_Z5truncf(float %arg) #0
+  %rint = tail call float @_Z5truncf(float %arg) nobuiltin
   ret float %rint
 }
 
@@ -220,28 +220,28 @@ define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[RINT]]
 ;
-  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) #0
+  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %rint
 }
 
 ; "no-builtins" should be ignored
-define float @test_rint_f32_nobuiltins(float %arg) #1 {
+define float @test_rint_f32_nobuiltins(float %arg) "no-builtins" {
 ; CHECK-LABEL: define float @test_rint_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call float @_Z5truncf(float %arg) #0
+  %rint = tail call float @_Z5truncf(float %arg) nobuiltin
   ret float %rint
 }
 
-define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 {
+define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) "no-builtins" {
 ; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
 ; CHECK-NEXT:    ret <2 x float> [[RINT]]
 ;
-  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) #0
+  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) nobuiltin
   ret <2 x float> %rint
 }
 
@@ -286,8 +286,8 @@ define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) {
 }
 
 ; Test the libm name, not a recognized opencl builtin.
-declare float @rintf(float) #2
-declare double @rint(double) #2
+declare float @rintf(float) nounwind memory(none)
+declare double @rint(double) nounwind memory(none)
 
 define float @test_libm_rint_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_rint_f32
@@ -309,19 +309,14 @@ define double @test_libm_rint_f64(double %arg) {
   ret double %rint
 }
 
-define float @test_rint_f32_strictfp(float %arg) #3 {
+define float @test_rint_f32_strictfp(float %arg) strictfp {
 ; CHECK-LABEL: define float @test_rint_f32_strictfp
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan float @_Z5truncf(float [[ARG]]) #[[ATTR2]]
 ; CHECK-NEXT:    ret float [[RINT]]
 ;
-  %rint = tail call nnan float @_Z5truncf(float %arg) #3
+  %rint = tail call nnan float @_Z5truncf(float %arg) strictfp
   ret float %rint
 }
 
-attributes #0 = { nobuiltin }
-attributes #1 = { "no-builtins" }
-attributes #2 = { nounwind memory(none) }
-attributes #3 = { strictfp }
-
 !0 = !{i32 1234}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-unroll-threshold.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-unroll-threshold.ll
index cee80e3dc46df7..6263105d5e1725 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-unroll-threshold.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-unroll-threshold.ll
@@ -33,7 +33,7 @@ do.end:                                           ; preds = %do.body
   ret void
 }
 
-define void @unroll_full() #0 {
+define void @unroll_full() "amdgpu-unroll-threshold"="1000" {
 entry:
   br label %do.body
 
@@ -48,5 +48,3 @@ do.body:                                          ; preds = %entry
 do.end:                                           ; preds = %do.body
   ret void
 }
-
-attributes #0 = { "amdgpu-unroll-threshold"="1000" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
index d6841d40f2313e..880208f5d80789 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
@@ -73,7 +73,7 @@
 ; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !2
 ; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !2
 ; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !2
-define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 {
+define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -94,7 +94,7 @@ entry:
 
 ; OPT-LABEL: @high_alignment(
 ; OPT: getelementptr inbounds [256 x [8 x i32]], ptr addrspace(3) @high_alignment.stack, i32 0, i32 %{{[0-9]+}}
-define amdgpu_kernel void @high_alignment(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 {
+define amdgpu_kernel void @high_alignment(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %stack = alloca [8 x i32], align 16, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -118,7 +118,7 @@ entry:
 ; OPT: alloca [5 x i32]
 
 ; SI-NOT: ds_write
-define amdgpu_kernel void @no_replace_inbounds_gep(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 {
+define amdgpu_kernel void @no_replace_inbounds_gep(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -152,7 +152,7 @@ entry:
 ; SI-NOT: v_movrel
 %struct.point = type { i32, i32 }
 
-define amdgpu_kernel void @multiple_structs(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @multiple_structs(ptr addrspace(1) %out) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %a = alloca %struct.point, addrspace(5)
   %b = alloca %struct.point, addrspace(5)
@@ -177,7 +177,7 @@ entry:
 ; R600-NOT: MOVA_INT
 ; SI-NOT: v_movrel
 
-define amdgpu_kernel void @direct_loop(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @direct_loop(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %prv_array_const = alloca [2 x i32], addrspace(5)
   %prv_array = alloca [2 x i32], addrspace(5)
@@ -218,7 +218,7 @@ for.end:
 ; SI-PROMOTE-VECT: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 4
 ; SI-PROMOTE-VECT: s_lshr_b32 [[SREG:s[0-9]+]], 0x10000, [[SCALED_IDX]]
 ; SI-PROMOTE-VECT: s_and_b32 s{{[0-9]+}}, [[SREG]], 1
-define amdgpu_kernel void @short_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @short_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %0 = alloca [2 x i16], addrspace(5)
   %1 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 1
@@ -240,7 +240,7 @@ entry:
 
 ; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 ; encoding: [0x00,0x00,0x60,0xe0
 ; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:1 ; encoding: [0x01,0x00,0x60,0xe0
-define amdgpu_kernel void @char_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %0 = alloca [2 x i8], addrspace(5)
   %1 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 1
@@ -259,7 +259,7 @@ entry:
 ;
 ; A total of 5 bytes should be allocated and used.
 ; SI: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 ;
-define amdgpu_kernel void @no_overlap(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @no_overlap(ptr addrspace(1) %out, i32 %in) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %0 = alloca [3 x i8], align 1, addrspace(5)
   %1 = alloca [2 x i8], align 1, addrspace(5)
@@ -281,7 +281,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @char_array_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x [2 x i8]], addrspace(5)
   %gep1 = getelementptr [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -294,7 +294,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @i32_array_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @i32_array_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x [2 x i32]], addrspace(5)
   %gep1 = getelementptr [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -306,7 +306,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @i64_array_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @i64_array_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x [2 x i64]], addrspace(5)
   %gep1 = getelementptr [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -320,7 +320,7 @@ entry:
 
 %struct.pair32 = type { i32, i32 }
 
-define amdgpu_kernel void @struct_array_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_array_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x [2 x %struct.pair32]], addrspace(5)
   %gep0 = getelementptr [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 0, i32 1
@@ -333,7 +333,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @struct_pair32_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_pair32_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x %struct.pair32], addrspace(5)
   %gep0 = getelementptr [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -367,7 +367,7 @@ entry:
 ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 ; SI: v_add_{{[iu]}}32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5,
 ; SI: buffer_load_dword v{{[0-9]+}}, [[ADD_OFFSET:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offen ;
-define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
   %alloca = alloca [16 x i32], addrspace(5)
   %tmp0 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
   store i32 5, ptr addrspace(5) %tmp0
@@ -383,7 +383,7 @@ define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
 ; OPT-LABEL: @pointer_typed_alloca(
 ; OPT:  getelementptr inbounds [256 x ptr addrspace(1)], ptr addrspace(3) @pointer_typed_alloca.A.addr, i32 0, i32 %{{[0-9]+}}
 ; OPT: load ptr addrspace(1), ptr addrspace(3) %{{[0-9]+}}, align 4
-define amdgpu_kernel void @pointer_typed_alloca(ptr addrspace(1) %A) #1 {
+define amdgpu_kernel void @pointer_typed_alloca(ptr addrspace(1) %A) nounwind "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %A.addr = alloca ptr addrspace(1), align 4, addrspace(5)
   store ptr addrspace(1) %A, ptr addrspace(5) %A.addr, align 4
@@ -526,9 +526,6 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" }
-attributes #1 = { nounwind "amdgpu-flat-work-group-size"="1,256" }
-
 !llvm.module.flags = !{!99}
 !99 = !{i32 1, !"amdhsa_code_object_version", i32 400}
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
index cc116dfe807ecd..9b710c156abbbc 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
@@ -18,7 +18,7 @@
 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
 define amdgpu_kernel void @ngroups_x (ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.ngroups.x() #0
+  %0 = call i32 @llvm.r600.read.ngroups.x() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -33,7 +33,7 @@ entry:
 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
 define amdgpu_kernel void @ngroups_y (ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.ngroups.y() #0
+  %0 = call i32 @llvm.r600.read.ngroups.y() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -48,7 +48,7 @@ entry:
 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
 define amdgpu_kernel void @ngroups_z (ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.ngroups.z() #0
+  %0 = call i32 @llvm.r600.read.ngroups.z() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -63,7 +63,7 @@ entry:
 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
 define amdgpu_kernel void @global_size_x (ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.global.size.x() #0
+  %0 = call i32 @llvm.r600.read.global.size.x() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -78,7 +78,7 @@ entry:
 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
 define amdgpu_kernel void @global_size_y (ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.global.size.y() #0
+  %0 = call i32 @llvm.r600.read.global.size.y() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -93,7 +93,7 @@ entry:
 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
 define amdgpu_kernel void @global_size_z (ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.global.size.z() #0
+  %0 = call i32 @llvm.r600.read.global.size.z() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -108,7 +108,7 @@ entry:
 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
 define amdgpu_kernel void @local_size_x (ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.local.size.x() #0
+  %0 = call i32 @llvm.r600.read.local.size.x() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -123,7 +123,7 @@ entry:
 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
 define amdgpu_kernel void @local_size_y (ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.local.size.y() #0
+  %0 = call i32 @llvm.r600.read.local.size.y() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -138,21 +138,19 @@ entry:
 ; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
 define amdgpu_kernel void @local_size_z (ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.local.size.z() #0
+  %0 = call i32 @llvm.r600.read.local.size.z() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.r600.read.ngroups.x() #0
-declare i32 @llvm.r600.read.ngroups.y() #0
-declare i32 @llvm.r600.read.ngroups.z() #0
+declare i32 @llvm.r600.read.ngroups.x() readnone
+declare i32 @llvm.r600.read.ngroups.y() readnone
+declare i32 @llvm.r600.read.ngroups.z() readnone
 
-declare i32 @llvm.r600.read.global.size.x() #0
-declare i32 @llvm.r600.read.global.size.y() #0
-declare i32 @llvm.r600.read.global.size.z() #0
+declare i32 @llvm.r600.read.global.size.x() readnone
+declare i32 @llvm.r600.read.global.size.y() readnone
+declare i32 @llvm.r600.read.global.size.z() readnone
 
-declare i32 @llvm.r600.read.local.size.x() #0
-declare i32 @llvm.r600.read.local.size.y() #0
-declare i32 @llvm.r600.read.local.size.z() #0
-
-attributes #0 = { readnone }
+declare i32 @llvm.r600.read.local.size.x() readnone
+declare i32 @llvm.r600.read.local.size.y() readnone
+declare i32 @llvm.r600.read.local.size.z() readnone
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index b7b2cb22c1b626..90b527926fe802 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -2,17 +2,17 @@
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
 ; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mattr=-xnack -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
 
-declare amdgpu_gfx float @extern_func(float) #0
-declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
+declare amdgpu_gfx float @extern_func(float) nounwind
+declare amdgpu_gfx float @extern_func_many_args(<64 x float>) nounwind
 
 @funcptr = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
-define amdgpu_gfx float @no_stack(float %arg0) #0 {
+define amdgpu_gfx float @no_stack(float %arg0) nounwind {
   %add = fadd float %arg0, 1.0
   ret float %add
 }
 
-define amdgpu_gfx float @simple_stack(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -20,7 +20,7 @@ define amdgpu_gfx float @simple_stack(float %arg0) #0 {
   ret float %add
 }
 
-define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
+define amdgpu_gfx float @multiple_stack(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -32,7 +32,7 @@ define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
   ret float %add2
 }
 
-define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
+define amdgpu_gfx float @dynamic_stack(float %arg0) nounwind {
 bb0:
   %cmp = fcmp ogt float %arg0, 0.0
   br i1 %cmp, label %bb1, label %bb2
@@ -49,7 +49,7 @@ bb2:
   ret float %res
 }
 
-define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
+define amdgpu_gfx float @dynamic_stack_loop(float %arg0) nounwind {
 bb0:
   br label %bb1
 
@@ -67,12 +67,12 @@ bb2:
   ret float %add
 }
 
-define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
+define amdgpu_gfx float @no_stack_call(float %arg0) nounwind {
   %res = call amdgpu_gfx float @simple_stack(float %arg0)
   ret float %res
 }
 
-define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack_call(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -81,12 +81,12 @@ define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
   ret float %add
 }
 
-define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
+define amdgpu_gfx float @no_stack_extern_call(float %arg0) nounwind {
   %res = call amdgpu_gfx float @extern_func(float %arg0)
   ret float %res
 }
 
-define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack_extern_call(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -95,18 +95,18 @@ define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
   ret float %add
 }
 
-define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
+define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) nounwind {
   %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
   ret float %res
 }
 
-define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
+define amdgpu_gfx float @no_stack_indirect_call(float %arg0) nounwind {
   %fptr = load ptr, ptr addrspace(4) @funcptr
   call amdgpu_gfx void %fptr()
   ret float %arg0
 }
 
-define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -116,7 +116,7 @@ define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
   ret float %add
 }
 
-define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack_recurse(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -127,19 +127,17 @@ define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
 
 @lds = internal addrspace(3) global [64 x float] undef
 
-define amdgpu_gfx float @simple_lds(float %arg0) #0 {
+define amdgpu_gfx float @simple_lds(float %arg0) nounwind {
   %val = load float, ptr addrspace(3) @lds
   ret float %val
 }
 
-define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
+define amdgpu_gfx float @simple_lds_recurse(float %arg0) nounwind {
   %val = load float, ptr addrspace(3) @lds
   %res = call amdgpu_gfx float @simple_lds_recurse(float %val)
   ret float %res
 }
 
-attributes #0 = { nounwind }
-
 ; GCN: amdpal.pipelines:
 ; GCN-NEXT:  - .registers:
 ; GCN-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
index 99a7ae37e0e78d..0cd4a5623f127b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
@@ -7,7 +7,7 @@
 
 ; CHECK:      .type          kernel_32_agprs
 ; CHECK:      NumAgprs:       32
-define amdgpu_kernel void @kernel_32_agprs() #0 {
+define amdgpu_kernel void @kernel_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v8}" ()
   call void asm sideeffect "", "~{a31}" ()
@@ -16,7 +16,7 @@ bb:
 
 ; CHECK:      .type          kernel_0_agprs
 ; CHECK:      NumAgprs:       0
-define amdgpu_kernel void @kernel_0_agprs() #0 {
+define amdgpu_kernel void @kernel_0_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v0}" ()
   ret void
@@ -24,7 +24,7 @@ bb:
 
 ; CHECK:      .type           kernel_40_vgprs
 ; CHECK:      NumAgprs:       16
-define amdgpu_kernel void @kernel_40_vgprs() #0 {
+define amdgpu_kernel void @kernel_40_vgprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v39}" ()
   call void asm sideeffect "", "~{a15}" ()
@@ -33,7 +33,7 @@ bb:
 
 ; CHECK:      .type          kernel_max_gprs
 ; CHECK:      NumAgprs:       256
-define amdgpu_kernel void @kernel_max_gprs() #0 {
+define amdgpu_kernel void @kernel_max_gprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v255}" ()
   call void asm sideeffect "", "~{a255}" ()
@@ -42,7 +42,7 @@ bb:
 
 ; CHECK:      .type          func_32_agprs
 ; CHECK:      NumAgprs:       32
-define void @func_32_agprs() #0 {
+define void @func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v8}" ()
   call void asm sideeffect "", "~{a31}" ()
@@ -51,9 +51,9 @@ bb:
 
 ; CHECK:      .type          kernel_call_func_32_agprs
 ; CHECK:      NumAgprs:       32
-define amdgpu_kernel void @kernel_call_func_32_agprs() #0 {
+define amdgpu_kernel void @kernel_call_func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
-  call void @func_32_agprs() #0
+  call void @func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512"
   ret void
 }
 
@@ -61,7 +61,7 @@ declare void @undef_func()
 
 ; CHECK:      .type          kernel_call_undef_func
 ; CHECK:      NumAgprs:       32
-define amdgpu_kernel void @kernel_call_undef_func() #0 {
+define amdgpu_kernel void @kernel_call_undef_func() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void @undef_func()
   ret void
@@ -74,5 +74,3 @@ bb:
 
 ; GFX908: agpr_count:  0x20
 ; GFX908: vgpr_count:  0x20
-
-attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-denormal.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-denormal.ll
index b86b428680059e..499816915b67b7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-denormal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-denormal.ll
@@ -6,7 +6,7 @@
 ; SI-DAG: 0x2e12 (COMPUTE_PGM_RSRC1): 0x2c0000{{$}}
 ; VI-DAG: 0x2e12 (COMPUTE_PGM_RSRC1): 0x2c02c0{{$}}
 ; GFX9-DAG: 0x2e12 (COMPUTE_PGM_RSRC1): 0x2c0000{{$}}
-define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
+define amdgpu_cs half @cs_amdpal(half %arg0) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -15,7 +15,7 @@ define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2cca (SPI_SHADER_PGM_RSRC1_ES): 0x2c0000{{$}}
 ; VI-DAG: 0x2cca (SPI_SHADER_PGM_RSRC1_ES): 0x2c02c0{{$}}
 ; GFX9-DAG: 0x2cca (SPI_SHADER_PGM_RSRC1_ES): 0x2c0000{{$}}
-define amdgpu_es half @es_amdpal(half %arg0) #0 {
+define amdgpu_es half @es_amdpal(half %arg0) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -24,7 +24,7 @@ define amdgpu_es half @es_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS): 0x2c0000{{$}}
 ; VI-DAG: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS): 0x2c02c0{{$}}
 ; GFX9-DAG: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS): 0x2c0000{{$}}
-define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
+define amdgpu_gs half @gs_amdpal(half %arg0) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -33,7 +33,7 @@ define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS): 0x2c0000{{$}}
 ; VI-DAG: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS): 0x2c02c0{{$}}
 ; GFX9-DAG: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS): 0x2c0000{{$}}
-define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
+define amdgpu_hs half @hs_amdpal(half %arg0) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -42,7 +42,7 @@ define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS): 0x2c0000{{$}}
 ; VI-DAG: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS): 0x2c02c0{{$}}
 ; GFX9-DAG: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS): 0x2c0000{{$}}
-define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
+define amdgpu_ls half @ls_amdpal(half %arg0) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -52,7 +52,7 @@ define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
 ; SI-DAG:           0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0x2c0000{{$}}
 ; VI-DAG:           0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0x2c02c0{{$}}
 ; GFX9-DAG:         0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0x2c0000{{$}}
-define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
+define amdgpu_ps half @ps_amdpal(half %arg0) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -61,13 +61,11 @@ define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS): 0x2c0000{{$}}
 ; VI-DAG: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS): 0x2c02c0{{$}}
 ; GFX9-DAG: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS): 0x2c0000{{$}}
-define amdgpu_vs half @vs_amdpal(half %arg0) #0 {
+define amdgpu_vs half @vs_amdpal(half %arg0) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
 
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-
 ; amdgpu.pal.metadata.msgpack represents this:
 ;
 ; 	.amdgpu_pal_metadata
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll
index b1db7aafacab0b..1e86bd2015da30 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-dx10-clamp.ll
@@ -6,7 +6,7 @@
 ; SI-DAG: 0x2e12 (COMPUTE_PGM_RSRC1): 0xf0000{{$}}
 ; VI-DAG: 0x2e12 (COMPUTE_PGM_RSRC1): 0xf02c0{{$}}
 ; GFX9-DAG: 0x2e12 (COMPUTE_PGM_RSRC1): 0xf0000{{$}}
-define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
+define amdgpu_cs half @cs_amdpal(half %arg0) "amdgpu-dx10-clamp"="false" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -15,7 +15,7 @@ define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2cca (SPI_SHADER_PGM_RSRC1_ES): 0xf0000{{$}}
 ; VI-DAG: 0x2cca (SPI_SHADER_PGM_RSRC1_ES): 0xf02c0{{$}}
 ; GFX9-DAG: 0x2cca (SPI_SHADER_PGM_RSRC1_ES): 0xf0000{{$}}
-define amdgpu_es half @es_amdpal(half %arg0) #0 {
+define amdgpu_es half @es_amdpal(half %arg0) "amdgpu-dx10-clamp"="false" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -24,7 +24,7 @@ define amdgpu_es half @es_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS): 0xf0000{{$}}
 ; VI-DAG: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS): 0xf02c0{{$}}
 ; GFX9-DAG: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS): 0xf0000{{$}}
-define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
+define amdgpu_gs half @gs_amdpal(half %arg0) "amdgpu-dx10-clamp"="false" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -33,7 +33,7 @@ define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS): 0xf0000{{$}}
 ; VI-DAG: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS): 0xf02c0{{$}}
 ; GFX9-DAG: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS): 0xf0000{{$}}
-define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
+define amdgpu_hs half @hs_amdpal(half %arg0) "amdgpu-dx10-clamp"="false" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -42,7 +42,7 @@ define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS): 0xf0000{{$}}
 ; VI-DAG: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS): 0xf02c0{{$}}
 ; GFX9-DAG: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS): 0xf0000{{$}}
-define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
+define amdgpu_ls half @ls_amdpal(half %arg0) "amdgpu-dx10-clamp"="false" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -52,7 +52,7 @@ define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
 ; SI-DAG:           0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0xf0000{{$}}
 ; VI-DAG:           0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0xf02c0{{$}}
 ; GFX9-DAG:         0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0xf0000{{$}}
-define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
+define amdgpu_ps half @ps_amdpal(half %arg0) "amdgpu-dx10-clamp"="false" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -61,13 +61,11 @@ define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
 ; SI-DAG: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS): 0xf0000{{$}}
 ; VI-DAG: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS): 0xf02c0{{$}}
 ; GFX9-DAG: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS): 0xf0000{{$}}
-define amdgpu_vs half @vs_amdpal(half %arg0) #0 {
+define amdgpu_vs half @vs_amdpal(half %arg0) "amdgpu-dx10-clamp"="false" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
 
-attributes #0 = { "amdgpu-dx10-clamp"="false" }
-
 ; amdgpu.pal.metadata.msgpack represents this:
 ;
 ; 	.amdgpu_pal_metadata
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll
index 95d533544c3082..112b16d8823fa0 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll
@@ -8,7 +8,7 @@
 ; VI-DAG: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf02c0{{$}}
 ; GFX9-DAG: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf0000{{$}}
 ; GFX12-DAG: 0x2e12 (COMPUTE_PGM_RSRC1): 0x600f0000{{$}}
-define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
+define amdgpu_cs half @cs_amdpal(half %arg0) "amdgpu-ieee"="true" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -18,7 +18,7 @@ define amdgpu_cs half @cs_amdpal(half %arg0) #0 {
 ; VI-DAG: 0x2cca (SPI_SHADER_PGM_RSRC1_ES): 0xaf02c0{{$}}
 ; GFX9-DAG: 0x2cca (SPI_SHADER_PGM_RSRC1_ES): 0xaf0000{{$}}
 ; GFX12-DAG: 0x2cca (SPI_SHADER_PGM_RSRC1_ES): 0xf0000{{$}}
-define amdgpu_es half @es_amdpal(half %arg0) #0 {
+define amdgpu_es half @es_amdpal(half %arg0) "amdgpu-ieee"="true" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -28,7 +28,7 @@ define amdgpu_es half @es_amdpal(half %arg0) #0 {
 ; VI-DAG: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS): 0xaf02c0{{$}}
 ; GFX9-DAG: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS): 0xaf0000{{$}}
 ; GFX12-DAG: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS): 0xa0f0000{{$}}
-define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
+define amdgpu_gs half @gs_amdpal(half %arg0) "amdgpu-ieee"="true" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -38,7 +38,7 @@ define amdgpu_gs half @gs_amdpal(half %arg0) #0 {
 ; VI-DAG: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS): 0xaf02c0{{$}}
 ; GFX9-DAG: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS): 0xaf0000{{$}}
 ; GFX12-DAG: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS): 0x50f0000{{$}}
-define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
+define amdgpu_hs half @hs_amdpal(half %arg0) "amdgpu-ieee"="true" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -48,7 +48,7 @@ define amdgpu_hs half @hs_amdpal(half %arg0) #0 {
 ; VI-DAG: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS): 0xaf02c0{{$}}
 ; GFX9-DAG: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS): 0xaf0000{{$}}
 ; GFX12-DAG: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS): 0xf0000{{$}}
-define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
+define amdgpu_ls half @ls_amdpal(half %arg0) "amdgpu-ieee"="true" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -59,7 +59,7 @@ define amdgpu_ls half @ls_amdpal(half %arg0) #0 {
 ; VI-DAG:           0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0xaf02c0{{$}}
 ; GFX9-DAG:         0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0xaf0000{{$}}
 ; GFX12-DAG:        0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0x20f0000{{$}}
-define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
+define amdgpu_ps half @ps_amdpal(half %arg0) "amdgpu-ieee"="true" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
@@ -69,13 +69,11 @@ define amdgpu_ps half @ps_amdpal(half %arg0) #0 {
 ; VI-DAG: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS): 0xaf02c0{{$}}
 ; GFX9-DAG: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS): 0xaf0000{{$}}
 ; GFX12-DAG: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS): 0x80f0000{{$}}
-define amdgpu_vs half @vs_amdpal(half %arg0) #0 {
+define amdgpu_vs half @vs_amdpal(half %arg0) "amdgpu-ieee"="true" {
   %add = fadd half %arg0, 1.0
   ret half %add
 }
 
-attributes #0 = { "amdgpu-ieee"="true" }
-
 ; amdgpu.pal.metadata.msgpack represents this:
 ;
 ; 	.amdgpu_pal_metadata
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll
index d6322e2b4d3e39..b6900b078635bd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll
@@ -9,7 +9,7 @@
 ; GCN: .amdgpu_pal_metadata
 ; GCN: 0xa1b3 (SPI_PS_INPUT_ENA): 0x2
 ; GCN: 0xa1b4 (SPI_PS_INPUT_ADDR): 0x2
-define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <2 x float> %pos) #6 {
+define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <2 x float> %pos) nounwind "InitialPSInputAddr"="2" {
   %inst23 = extractelement <2 x float> %pos, i32 0
   %inst24 = extractelement <2 x float> %pos, i32 1
   %inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0)
@@ -20,8 +20,6 @@ define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inre
 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
 
-attributes #6 = { nounwind "InitialPSInputAddr"="2" }
-
 ; Force MsgPack format metadata
 !amdgpu.pal.metadata.msgpack = !{!0}
 !0 = !{!""}
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
index 13d2050c491fb3..1947e3cd71da12 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
@@ -20,7 +20,7 @@
 ; GCN-NEXT:       0xa1b4 (SPI_PS_INPUT_ADDR): 0x2
 ; GCN-NEXT: ...
 ; GCN-NEXT:         .end_amdgpu_pal_metadata
-define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <2 x float> %pos) #6 {
+define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <2 x float> %pos) nounwind "InitialPSInputAddr"="2" {
   %inst23 = extractelement <2 x float> %pos, i32 0
   %inst24 = extractelement <2 x float> %pos, i32 1
   %inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0)
@@ -30,5 +30,3 @@ define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inre
 
 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
-
-attributes #6 = { nounwind "InitialPSInputAddr"="2" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 0ec5aeb24b4235..c4fa0862682d58 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -41,7 +41,7 @@ entry:
 ; PAL: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:
 ; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
 
-define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) #0 {
+define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, ptr addrspace(5) %out) nounwind "amdgpu-git-ptr-high"="0x1234" {
 entry:
   %v = alloca [2 x i32], addrspace(5)
   store <2 x i32> %in, ptr addrspace(5) %v
@@ -63,7 +63,7 @@ entry:
 ; VI: s_load_dwordx4 s[[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s[[[GITPTR]]:{{[0-9]+\]}}, 0x10
 ; PAL: buffer_store{{.*}}, s[[[SCRATCHDESC]]:
 
-define amdgpu_cs void @scratch2_cs(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32> %coord, <2 x i32> %in, i32 %extra, i32 %idx) #0 {
+define amdgpu_cs void @scratch2_cs(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32> %coord, <2 x i32> %in, i32 %extra, i32 %idx) nounwind "amdgpu-git-ptr-high"="0x1234" {
 entry:
   %v = alloca [3 x i32], addrspace(5)
   %v1 = getelementptr [3 x i32], ptr addrspace(5) %v, i32 0, i32 1
@@ -76,8 +76,6 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind "amdgpu-git-ptr-high"="0x1234" }
-
 declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
 
 
diff --git a/llvm/test/CodeGen/AMDGPU/and-gcn.ll b/llvm/test/CodeGen/AMDGPU/and-gcn.ll
index 095c25d8436b53..da1a9f049fdb1b 100644
--- a/llvm/test/CodeGen/AMDGPU/and-gcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/and-gcn.ll
@@ -5,7 +5,7 @@
 ; SI: s_and_b64
 define amdgpu_kernel void @v_and_i64_br(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
 entry:
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %if, label %endif
 
@@ -21,6 +21,4 @@ endif:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/and.ll b/llvm/test/CodeGen/AMDGPU/and.ll
index d6137597293f65..f079827d02f8a9 100644
--- a/llvm/test/CodeGen/AMDGPU/and.ll
+++ b/llvm/test/CodeGen/AMDGPU/and.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; FUNC-LABEL: {{^}}test2:
 ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -94,7 +94,7 @@ define amdgpu_kernel void @s_and_multi_use_constant_i32_1(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
 ; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @v_and_i32_vgpr_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %gep.b = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -110,7 +110,7 @@ define amdgpu_kernel void @v_and_i32_vgpr_vgpr(ptr addrspace(1) %out, ptr addrsp
 ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
 define amdgpu_kernel void @v_and_i32_sgpr_vgpr(ptr addrspace(1) %out, i32 %a, ptr addrspace(1) %bptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.b = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %b = load i32, ptr addrspace(1) %gep.b
@@ -124,7 +124,7 @@ define amdgpu_kernel void @v_and_i32_sgpr_vgpr(ptr addrspace(1) %out, i32 %a, pt
 ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
 define amdgpu_kernel void @v_and_i32_vgpr_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i32 %b) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %a = load i32, ptr addrspace(1) %gep.a
@@ -136,7 +136,7 @@ define amdgpu_kernel void @v_and_i32_vgpr_sgpr(ptr addrspace(1) %out, ptr addrsp
 ; FUNC-LABEL: {{^}}v_and_constant_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
 define amdgpu_kernel void @v_and_constant_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %a = load i32, ptr addrspace(1) %gep, align 4
   %and = and i32 %a, 1234567
@@ -147,7 +147,7 @@ define amdgpu_kernel void @v_and_constant_i32(ptr addrspace(1) %out, ptr addrspa
 ; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
 define amdgpu_kernel void @v_and_inline_imm_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %a = load i32, ptr addrspace(1) %gep, align 4
   %and = and i32 %a, 64
@@ -158,7 +158,7 @@ define amdgpu_kernel void @v_and_inline_imm_64_i32(ptr addrspace(1) %out, ptr ad
 ; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
 define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %a = load i32, ptr addrspace(1) %gep, align 4
   %and = and i32 %a, -16
@@ -249,7 +249,7 @@ define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(ptr addrspace(1) %out,
 ; SI: v_and_b32
 ; SI: v_and_b32
 define amdgpu_kernel void @v_and_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %a = load i64, ptr addrspace(1) %gep.a, align 8
   %gep.b = getelementptr i64, ptr addrspace(1) %bptr, i32 %tid
@@ -264,7 +264,7 @@ define amdgpu_kernel void @v_and_i64(ptr addrspace(1) %out, ptr addrspace(1) %ap
 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}}
 ; SI: buffer_store_dwordx2
 define amdgpu_kernel void @v_and_constant_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %a = load i64, ptr addrspace(1) %gep.a, align 8
   %and = and i64 %a, 1231231234567
@@ -318,7 +318,7 @@ define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(ptr addrspace(1) %out,
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
 define amdgpu_kernel void @v_and_i64_32_bit_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %a = load i64, ptr addrspace(1) %gep.a, align 8
   %and = and i64 %a, 1234567
@@ -333,7 +333,7 @@ define amdgpu_kernel void @v_and_i64_32_bit_constant(ptr addrspace(1) %out, ptr
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
 define amdgpu_kernel void @v_and_inline_imm_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %a = load i64, ptr addrspace(1) %gep.a, align 8
   %and = and i64 %a, 64
@@ -349,7 +349,7 @@ define amdgpu_kernel void @v_and_inline_imm_i64(ptr addrspace(1) %out, ptr addrs
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2 v[[[VAL_LO]]:[[VAL_HI]]]
 define amdgpu_kernel void @v_and_inline_neg_imm_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %a = load i64, ptr addrspace(1) %gep.a, align 8
   %and = and i64 %a, -8
@@ -568,4 +568,3 @@ define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(ptr addrspace(1
   store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/andorbitset.ll b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
index a189ba9b103421..f5195d22b132ed 100644
--- a/llvm/test/CodeGen/AMDGPU/andorbitset.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
@@ -54,15 +54,15 @@ define amdgpu_kernel void @s_set_midbit(ptr addrspace(1) %out, i32 %in) {
 ; SI-LABEL: {{^}}bitset_verifier_error:
 ; SI-NOT:   %bb.1:
 ; SI:       s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff
-define void @bitset_verifier_error() local_unnamed_addr #0 {
+define void @bitset_verifier_error() local_unnamed_addr nounwind readnone speculatable willreturn {
 bb:
-  %i = call float @llvm.fabs.f32(float undef) #0
+  %i = call float @llvm.fabs.f32(float undef) nounwind readnone speculatable willreturn
   %i1 = bitcast float %i to i32
   store i32 %i1, ptr addrspace(1) @gv
   br label %bb2
 
 bb2:
-  %i3 = call float @llvm.fabs.f32(float undef) #0
+  %i3 = call float @llvm.fabs.f32(float undef) nounwind readnone speculatable willreturn
   %i4 = fcmp fast ult float %i3, 0x3FEFF7CEE0000000
   br i1 %i4, label %bb5, label %bb6
 
@@ -73,6 +73,4 @@ bb6:
   unreachable
 }
 
-declare float @llvm.fabs.f32(float) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index 3d4ae84d9c698e..9459990b7809ad 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -4,21 +4,21 @@
 
 ; TODO: The test contains UB which is refined by the Attributor and should be removed.
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone speculatable
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone speculatable
 
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-declare i64 @llvm.amdgcn.dispatch.id() #0
+declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable
+declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone speculatable
+declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone speculatable
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable
+declare i64 @llvm.amdgcn.dispatch.id() nounwind readnone speculatable
 
-define void @use_workitem_id_x() #1 {
+define void @use_workitem_id_x() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x
 ; AKF_HSA-SAME: () #[[ATTR1:[0-9]+]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -36,7 +36,7 @@ define void @use_workitem_id_x() #1 {
   ret void
 }
 
-define void @use_workitem_id_y() #1 {
+define void @use_workitem_id_y() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_y
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
@@ -54,7 +54,7 @@ define void @use_workitem_id_y() #1 {
   ret void
 }
 
-define void @use_workitem_id_z() #1 {
+define void @use_workitem_id_z() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_z
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
@@ -72,7 +72,7 @@ define void @use_workitem_id_z() #1 {
   ret void
 }
 
-define void @use_workgroup_id_x() #1 {
+define void @use_workgroup_id_x() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_x
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -90,7 +90,7 @@ define void @use_workgroup_id_x() #1 {
   ret void
 }
 
-define void @use_workgroup_id_y() #1 {
+define void @use_workgroup_id_y() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -108,7 +108,7 @@ define void @use_workgroup_id_y() #1 {
   ret void
 }
 
-define void @use_workgroup_id_z() #1 {
+define void @use_workgroup_id_z() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_z
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
@@ -126,7 +126,7 @@ define void @use_workgroup_id_z() #1 {
   ret void
 }
 
-define void @use_dispatch_ptr() #1 {
+define void @use_dispatch_ptr() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
@@ -144,7 +144,7 @@ define void @use_dispatch_ptr() #1 {
   ret void
 }
 
-define void @use_queue_ptr() #1 {
+define void @use_queue_ptr() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_queue_ptr
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[QUEUE_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
@@ -162,7 +162,7 @@ define void @use_queue_ptr() #1 {
   ret void
 }
 
-define void @use_dispatch_id() #1 {
+define void @use_dispatch_id() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_id
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
@@ -180,7 +180,7 @@ define void @use_dispatch_id() #1 {
   ret void
 }
 
-define void @use_workgroup_id_y_workgroup_id_z() #1 {
+define void @use_workgroup_id_y_workgroup_id_z() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -204,7 +204,7 @@ define void @use_workgroup_id_y_workgroup_id_z() #1 {
   ret void
 }
 
-define void @func_indirect_use_workitem_id_x() #1 {
+define void @func_indirect_use_workitem_id_x() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_workitem_id_x()
@@ -219,7 +219,7 @@ define void @func_indirect_use_workitem_id_x() #1 {
   ret void
 }
 
-define void @kernel_indirect_use_workitem_id_x() #1 {
+define void @kernel_indirect_use_workitem_id_x() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_workitem_id_x()
@@ -234,7 +234,7 @@ define void @kernel_indirect_use_workitem_id_x() #1 {
   ret void
 }
 
-define void @func_indirect_use_workitem_id_y() #1 {
+define void @func_indirect_use_workitem_id_y() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_workitem_id_y()
@@ -249,7 +249,7 @@ define void @func_indirect_use_workitem_id_y() #1 {
   ret void
 }
 
-define void @func_indirect_use_workitem_id_z() #1 {
+define void @func_indirect_use_workitem_id_z() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_workitem_id_z()
@@ -264,7 +264,7 @@ define void @func_indirect_use_workitem_id_z() #1 {
   ret void
 }
 
-define void @func_indirect_use_workgroup_id_x() #1 {
+define void @func_indirect_use_workgroup_id_x() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_workgroup_id_x()
@@ -279,7 +279,7 @@ define void @func_indirect_use_workgroup_id_x() #1 {
   ret void
 }
 
-define void @kernel_indirect_use_workgroup_id_x() #1 {
+define void @kernel_indirect_use_workgroup_id_x() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_workgroup_id_x()
@@ -294,7 +294,7 @@ define void @kernel_indirect_use_workgroup_id_x() #1 {
   ret void
 }
 
-define void @func_indirect_use_workgroup_id_y() #1 {
+define void @func_indirect_use_workgroup_id_y() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_workgroup_id_y()
@@ -309,7 +309,7 @@ define void @func_indirect_use_workgroup_id_y() #1 {
   ret void
 }
 
-define void @func_indirect_use_workgroup_id_z() #1 {
+define void @func_indirect_use_workgroup_id_z() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_workgroup_id_z()
@@ -324,7 +324,7 @@ define void @func_indirect_use_workgroup_id_z() #1 {
   ret void
 }
 
-define void @func_indirect_indirect_use_workgroup_id_y() #1 {
+define void @func_indirect_indirect_use_workgroup_id_y() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @func_indirect_use_workgroup_id_y()
@@ -339,7 +339,7 @@ define void @func_indirect_indirect_use_workgroup_id_y() #1 {
   ret void
 }
 
-define void @indirect_x2_use_workgroup_id_y() #1 {
+define void @indirect_x2_use_workgroup_id_y() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @func_indirect_indirect_use_workgroup_id_y()
@@ -354,7 +354,7 @@ define void @indirect_x2_use_workgroup_id_y() #1 {
   ret void
 }
 
-define void @func_indirect_use_dispatch_ptr() #1 {
+define void @func_indirect_use_dispatch_ptr() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_dispatch_ptr()
@@ -369,7 +369,7 @@ define void @func_indirect_use_dispatch_ptr() #1 {
   ret void
 }
 
-define void @func_indirect_use_queue_ptr() #1 {
+define void @func_indirect_use_queue_ptr() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_queue_ptr()
@@ -384,7 +384,7 @@ define void @func_indirect_use_queue_ptr() #1 {
   ret void
 }
 
-define void @func_indirect_use_dispatch_id() #1 {
+define void @func_indirect_use_dispatch_id() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_dispatch_id()
@@ -399,7 +399,7 @@ define void @func_indirect_use_dispatch_id() #1 {
   ret void
 }
 
-define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 {
+define void @func_indirect_use_workgroup_id_y_workgroup_id_z() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
@@ -414,7 +414,7 @@ define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 {
   ret void
 }
 
-define void @recursive_use_workitem_id_y() #1 {
+define void @recursive_use_workitem_id_y() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
@@ -435,7 +435,7 @@ define void @recursive_use_workitem_id_y() #1 {
   ret void
 }
 
-define void @call_recursive_use_workitem_id_y() #1 {
+define void @call_recursive_use_workitem_id_y() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @recursive_use_workitem_id_y()
@@ -450,7 +450,7 @@ define void @call_recursive_use_workitem_id_y() #1 {
   ret void
 }
 
-define void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #1 {
+define void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
 ; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
@@ -469,7 +469,7 @@ define void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #1 {
 }
 
 
-define void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) %ptr) #2 {
+define void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) %ptr) nounwind "target-cpu"="gfx900" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
 ; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
@@ -487,7 +487,7 @@ define void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) %ptr) #2 {
   ret void
 }
 
-define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) %ptr) #2 {
+define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) %ptr) nounwind "target-cpu"="gfx900" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
 ; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR2]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
@@ -508,7 +508,7 @@ define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) %pt
   ret void
 }
 
-define void @indirect_use_group_to_flat_addrspacecast() #1 {
+define void @indirect_use_group_to_flat_addrspacecast() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_group_to_flat_addrspacecast(ptr addrspace(3) null)
@@ -523,7 +523,7 @@ define void @indirect_use_group_to_flat_addrspacecast() #1 {
   ret void
 }
 
-define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
+define void @indirect_use_group_to_flat_addrspacecast_gfx9() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) null)
@@ -538,7 +538,7 @@ define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
   ret void
 }
 
-define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
+define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) null)
@@ -553,7 +553,7 @@ define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
   ret void
 }
 
-define void @use_kernarg_segment_ptr() #1 {
+define void @use_kernarg_segment_ptr() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[KERNARG_SEGMENT_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -570,7 +570,7 @@ define void @use_kernarg_segment_ptr() #1 {
   store volatile ptr addrspace(4) %kernarg.segment.ptr, ptr addrspace(1) undef
   ret void
 }
-define void @func_indirect_use_kernarg_segment_ptr() #1 {
+define void @func_indirect_use_kernarg_segment_ptr() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_kernarg_segment_ptr()
@@ -585,7 +585,7 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 {
   ret void
 }
 
-define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
+define amdgpu_kernel void @kern_use_implicitarg_ptr() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -603,7 +603,7 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
   ret void
 }
 
-define void @use_implicitarg_ptr() #1 {
+define void @use_implicitarg_ptr() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -621,7 +621,7 @@ define void @use_implicitarg_ptr() #1 {
   ret void
 }
 
-define void @func_indirect_use_implicitarg_ptr() #1 {
+define void @func_indirect_use_implicitarg_ptr() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    call void @use_implicitarg_ptr()
@@ -636,10 +636,10 @@ define void @func_indirect_use_implicitarg_ptr() #1 {
   ret void
 }
 
-declare void @external.func() #3
+declare void @external.func() nounwind
 
 ; This function gets deleted.
-define internal void @defined.func() #3 {
+define internal void @defined.func() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@defined.func
 ; AKF_HSA-SAME: () #[[ATTR3:[0-9]+]] {
 ; AKF_HSA-NEXT:    ret void
@@ -651,7 +651,7 @@ define internal void @defined.func() #3 {
   ret void
 }
 
-define void @func_call_external() #3 {
+define void @func_call_external() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_external
 ; AKF_HSA-SAME: () #[[ATTR3]] {
 ; AKF_HSA-NEXT:    call void @external.func()
@@ -666,7 +666,7 @@ define void @func_call_external() #3 {
   ret void
 }
 
-define void @func_call_defined() #3 {
+define void @func_call_defined() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_defined
 ; AKF_HSA-SAME: () #[[ATTR3]] {
 ; AKF_HSA-NEXT:    call void @defined.func()
@@ -680,7 +680,7 @@ define void @func_call_defined() #3 {
   call void @defined.func()
   ret void
 }
-define void @func_call_asm() #3 {
+define void @func_call_asm() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_asm
 ; AKF_HSA-SAME: () #[[ATTR3]] {
 ; AKF_HSA-NEXT:    call void asm sideeffect "", ""() #[[ATTR3]]
@@ -691,11 +691,11 @@ define void @func_call_asm() #3 {
 ; ATTRIBUTOR_HSA-NEXT:    call void asm sideeffect "", ""() #[[ATTR28:[0-9]+]]
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  call void asm sideeffect "", ""() #3
+  call void asm sideeffect "", ""() nounwind
   ret void
 }
 
-define amdgpu_kernel void @kern_call_external() #3 {
+define amdgpu_kernel void @kern_call_external() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_external
 ; AKF_HSA-SAME: () #[[ATTR4:[0-9]+]] {
 ; AKF_HSA-NEXT:    call void @external.func()
@@ -710,7 +710,7 @@ define amdgpu_kernel void @kern_call_external() #3 {
   ret void
 }
 
-define amdgpu_kernel void @func_kern_defined() #3 {
+define amdgpu_kernel void @func_kern_defined() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_kern_defined
 ; AKF_HSA-SAME: () #[[ATTR4]] {
 ; AKF_HSA-NEXT:    call void @defined.func()
@@ -725,7 +725,7 @@ define amdgpu_kernel void @func_kern_defined() #3 {
   ret void
 }
 
-define i32 @use_dispatch_ptr_ret_type() #1 {
+define i32 @use_dispatch_ptr_ret_type() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
@@ -743,7 +743,7 @@ define i32 @use_dispatch_ptr_ret_type() #1 {
   ret i32 0
 }
 
-define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
+define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() nounwind "target-cpu"="fiji" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
 ; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float @use_dispatch_ptr_ret_type()
@@ -761,7 +761,7 @@ define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
   ret float %fadd
 }
 
-define float @func_indirect_call(ptr %fptr) #3 {
+define float @func_indirect_call(ptr %fptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_call
 ; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
@@ -779,8 +779,8 @@ define float @func_indirect_call(ptr %fptr) #3 {
   ret float %fadd
 }
 
-declare float @extern() #3
-define float @func_extern_call() #3 {
+declare float @extern() nounwind
+define float @func_extern_call() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call
 ; AKF_HSA-SAME: () #[[ATTR3]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float @extern()
@@ -798,7 +798,7 @@ define float @func_extern_call() #3 {
   ret float %fadd
 }
 
-define float @func_null_call(ptr %fptr) #3 {
+define float @func_null_call(ptr %fptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_null_call
 ; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float null()
@@ -816,10 +816,10 @@ define float @func_null_call(ptr %fptr) #3 {
   ret float %fadd
 }
 
-declare float @llvm.amdgcn.rcp.f32(float) #0
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone speculatable
 
 ; Calls some other recognized intrinsic
-define float @func_other_intrinsic_call(float %arg) #3 {
+define float @func_other_intrinsic_call(float %arg) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call
 ; AKF_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]])
@@ -838,7 +838,7 @@ define float @func_other_intrinsic_call(float %arg) #3 {
 }
 
 ; Hostcall needs to be enabled for sanitizers
-define amdgpu_kernel void @kern_sanitize_address() #4 {
+define amdgpu_kernel void @kern_sanitize_address() nounwind sanitize_address {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
 ; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] {
 ; AKF_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
@@ -854,7 +854,7 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
 }
 
 ; Hostcall needs to be enabled for sanitizers
-define void @func_sanitize_address() #4 {
+define void @func_sanitize_address() nounwind sanitize_address {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
 ; AKF_HSA-SAME: () #[[ATTR5]] {
 ; AKF_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
@@ -870,7 +870,7 @@ define void @func_sanitize_address() #4 {
 }
 
 ; Hostcall needs to be enabled for sanitizers
-define void @func_indirect_sanitize_address() #3 {
+define void @func_indirect_sanitize_address() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
 ; AKF_HSA-SAME: () #[[ATTR3]] {
 ; AKF_HSA-NEXT:    call void @func_sanitize_address()
@@ -886,7 +886,7 @@ define void @func_indirect_sanitize_address() #3 {
 }
 
 ; Hostcall needs to be enabled for sanitizers
-define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
+define amdgpu_kernel void @kern_indirect_sanitize_address() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
 ; AKF_HSA-SAME: () #[[ATTR4]] {
 ; AKF_HSA-NEXT:    call void @func_sanitize_address()
@@ -903,9 +903,9 @@ define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
 
 ; Marked with amdgpu-no-implicitarg-ptr, and
 ; sanitize_address. sanitize_address wins and requires the pointer.
-declare void @extern_func_sanitize_address() #5
+declare void @extern_func_sanitize_address() nounwind sanitize_address "amdgpu-no-implicitarg-ptr"
 
-define amdgpu_kernel void @kern_decl_sanitize_address() #3 {
+define amdgpu_kernel void @kern_decl_sanitize_address() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_decl_sanitize_address
 ; AKF_HSA-SAME: () #[[ATTR4]] {
 ; AKF_HSA-NEXT:    call void @extern_func_sanitize_address()
@@ -920,9 +920,9 @@ define amdgpu_kernel void @kern_decl_sanitize_address() #3 {
   ret void
 }
 
-declare void @enqueue_block_decl() #6
+declare void @enqueue_block_decl() "enqueued-block"
 
-define internal void @enqueue_block_def() #6 {
+define internal void @enqueue_block_def() "enqueued-block" {
 ; AKF_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
 ; AKF_HSA-SAME: () #[[ATTR7:[0-9]+]] {
 ; AKF_HSA-NEXT:    ret void
@@ -998,18 +998,10 @@ define amdgpu_kernel void @kern_callsite_enqueue_block() {
 ; ATTRIBUTOR_HSA-NEXT:    call void @known_func() #[[ATTR29:[0-9]+]]
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
-  call void @known_func() #6
+  call void @known_func() "enqueued-block"
   ret void
 }
 
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "target-cpu"="gfx900" }
-attributes #3 = { nounwind }
-attributes #4 = { nounwind sanitize_address }
-attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
-attributes #6 = { "enqueued-block" }
-
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 43cdf85ed3818c..34267e0b6c1e56 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -4,23 +4,23 @@
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone speculatable
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone speculatable
 
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
+declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable
+declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone speculatable
+declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone speculatable
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable
 
 declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #2
 declare i1 @llvm.amdgcn.is.private(ptr nocapture) #2
 
-define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_tgid_x
 ; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -32,7 +32,7 @@ define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_y
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -50,7 +50,7 @@ define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -74,7 +74,7 @@ define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_x_y
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -98,7 +98,7 @@ define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_z
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
@@ -116,7 +116,7 @@ define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_x_z
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -140,7 +140,7 @@ define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_y_z
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -164,7 +164,7 @@ define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -194,7 +194,7 @@ define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_tidig_x
 ; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -206,7 +206,7 @@ define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tidig_y
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
@@ -224,7 +224,7 @@ define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tidig_z
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
@@ -242,7 +242,7 @@ define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x
 ; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -258,7 +258,7 @@ define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
@@ -282,7 +282,7 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -312,7 +312,7 @@ define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_all_workitems
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -360,7 +360,7 @@ define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
@@ -381,7 +381,7 @@ define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_queue_ptr(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_queue_ptr(ptr addrspace(1) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_queue_ptr
 ; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
@@ -402,7 +402,7 @@ define amdgpu_kernel void @use_queue_ptr(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_kernarg_segment_ptr(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_kernarg_segment_ptr(ptr addrspace(1) %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
 ; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -416,7 +416,7 @@ define amdgpu_kernel void @use_kernarg_segment_ptr(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
 ; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
@@ -434,7 +434,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
   ret void
 }
 
-define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #1 {
+define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
 ; AKF_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
@@ -452,7 +452,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
   ret void
 }
 
-define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_flat_to_group_addrspacecast
 ; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
 ; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(3)
@@ -464,7 +464,7 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_flat_to_private_addrspacecast
 ; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
 ; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
@@ -477,7 +477,7 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 {
 }
 
 ; No-op addrspacecast should not use queue ptr
-define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
 ; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
@@ -489,7 +489,7 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt
   ret void
 }
 
-define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #1 {
+define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
 ; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
 ; HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
@@ -501,7 +501,7 @@ define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %
   ret void
 }
 
-define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_flat_to_global_addrspacecast
 ; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
 ; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)
@@ -513,7 +513,7 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #1 {
+define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) nounwind {
 ; HSA-LABEL: define {{[^@]+}}@use_flat_to_constant_addrspacecast
 ; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
 ; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(4)
@@ -525,7 +525,7 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_is_shared(ptr %ptr) #1 {
+define amdgpu_kernel void @use_is_shared(ptr %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_is_shared
 ; AKF_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PTR]])
@@ -546,7 +546,7 @@ define amdgpu_kernel void @use_is_shared(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_is_private(ptr %ptr) #1 {
+define amdgpu_kernel void @use_is_private(ptr %ptr) nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_is_private
 ; AKF_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]])
@@ -567,7 +567,7 @@ define amdgpu_kernel void @use_is_private(ptr %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_alloca() #1 {
+define amdgpu_kernel void @use_alloca() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca
 ; AKF_HSA-SAME: () #[[ATTR2:[0-9]+]] {
 ; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
@@ -585,7 +585,7 @@ define amdgpu_kernel void @use_alloca() #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_alloca_non_entry_block() #1 {
+define amdgpu_kernel void @use_alloca_non_entry_block() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block
 ; AKF_HSA-SAME: () #[[ATTR2]] {
 ; AKF_HSA-NEXT:  entry:
@@ -613,7 +613,7 @@ bb:
   ret void
 }
 
-define void @use_alloca_func() #1 {
+define void @use_alloca_func() nounwind {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_func
 ; AKF_HSA-SAME: () #[[ATTR2]] {
 ; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
@@ -631,9 +631,6 @@ define void @use_alloca_func() #1 {
   ret void
 }
 
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
 
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
index 547ff69592ca0e..3739f8b6e9b4b0 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -2,19 +2,19 @@
 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
 
-declare i32 @llvm.r600.read.tgid.x() #0
-declare i32 @llvm.r600.read.tgid.y() #0
-declare i32 @llvm.r600.read.tgid.z() #0
+declare i32 @llvm.r600.read.tgid.x() nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() nounwind readnone
+declare i32 @llvm.r600.read.tgid.z() nounwind readnone
 
-declare i32 @llvm.r600.read.tidig.x() #0
-declare i32 @llvm.r600.read.tidig.y() #0
-declare i32 @llvm.r600.read.tidig.z() #0
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() nounwind readnone
+declare i32 @llvm.r600.read.tidig.z() nounwind readnone
 
-declare i32 @llvm.r600.read.local.size.x() #0
-declare i32 @llvm.r600.read.local.size.y() #0
-declare i32 @llvm.r600.read.local.size.z() #0
+declare i32 @llvm.r600.read.local.size.x() nounwind readnone
+declare i32 @llvm.r600.read.local.size.y() nounwind readnone
+declare i32 @llvm.r600.read.local.size.z() nounwind readnone
 
-define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@use_tgid_x
 ; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.x()
@@ -26,7 +26,7 @@ define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_y
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.y()
@@ -44,7 +44,7 @@ define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@multi_use_tgid_y
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
@@ -68,7 +68,7 @@ define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
@@ -92,7 +92,7 @@ define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_z
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.z()
@@ -110,7 +110,7 @@ define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_x_z
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
@@ -134,7 +134,7 @@ define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_y_z
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
@@ -158,7 +158,7 @@ define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y_z
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
@@ -188,7 +188,7 @@ define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@use_tidig_x
 ; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.x()
@@ -200,7 +200,7 @@ define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tidig_y
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.y()
@@ -218,7 +218,7 @@ define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tidig_z
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.z()
@@ -236,7 +236,7 @@ define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x
 ; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
@@ -252,7 +252,7 @@ define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.y()
@@ -276,7 +276,7 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tidig_x_y_z
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
@@ -306,7 +306,7 @@ define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) nounwind {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_all_workitems
 ; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
@@ -354,7 +354,7 @@ define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_get_local_size_x(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_get_local_size_x(ptr addrspace(1) %ptr) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_x
 ; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.x()
@@ -366,7 +366,7 @@ define amdgpu_kernel void @use_get_local_size_x(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_get_local_size_y(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_get_local_size_y(ptr addrspace(1) %ptr) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_y
 ; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.y()
@@ -378,7 +378,7 @@ define amdgpu_kernel void @use_get_local_size_y(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @use_get_local_size_z(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @use_get_local_size_z(ptr addrspace(1) %ptr) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_z
 ; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.z()
@@ -390,9 +390,6 @@ define amdgpu_kernel void @use_get_local_size_z(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-
 ;.
 ; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ; AKF_CHECK: attributes #[[ATTR1]] = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll b/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
index 07e39d798f58d0..cfeb0b3972512c 100644
--- a/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/any_extend_vector_inreg.ll
@@ -24,7 +24,7 @@
 ; GCN: {{buffer|flat}}_store_byte
 ; GCN: {{buffer|flat}}_store_byte
 ; GCN: {{buffer|flat}}_store_byte
-define amdgpu_kernel void @any_extend_vector_inreg_v16i8_to_v4i32(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) %arg1) local_unnamed_addr #0 {
+define amdgpu_kernel void @any_extend_vector_inreg_v16i8_to_v4i32(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) %arg1) local_unnamed_addr nounwind {
 bb:
   %tmp2 = load <16 x i8>, ptr addrspace(1) %arg, align 16
   %tmp3 = extractelement <16 x i8> %tmp2, i64 4
@@ -50,5 +50,3 @@ bb:
   store <16 x i8> %tmp19, ptr addrspace(1) %arg1, align 1
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/anyext.ll b/llvm/test/CodeGen/AMDGPU/anyext.ll
index 897e134ee48d83..a90a96d70d402a 100644
--- a/llvm/test/CodeGen/AMDGPU/anyext.ll
+++ b/llvm/test/CodeGen/AMDGPU/anyext.ll
@@ -6,7 +6,7 @@
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
 
-define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
+define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) nounwind {
 ; GCN-LABEL: anyext_i1_i32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -59,7 +59,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
 ; GCN-LABEL: s_anyext_i16_i32:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -145,7 +145,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
+define amdgpu_kernel void @anyext_v2i16_to_v2i32() nounwind {
 ; GCN-LABEL: anyext_v2i16_to_v2i32:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
@@ -199,5 +199,3 @@ bb:
   store i8 %tmp12, ptr addrspace(1) undef, align 1
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll b/llvm/test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll
index f15435d9e200fc..fcd69213146e1a 100644
--- a/llvm/test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll
@@ -7,11 +7,9 @@
 ; GCN: global_load_dword
 ; GCN: ds_min_u32
 ; GCN: ds_max_u32
-define amdgpu_kernel void @are_loads_from_same_base_ptr_ds_atomic(ptr addrspace(1) %arg0, ptr addrspace(3) noalias %ptr0) #0 {
+define amdgpu_kernel void @are_loads_from_same_base_ptr_ds_atomic(ptr addrspace(1) %arg0, ptr addrspace(3) noalias %ptr0) nounwind {
   %tmp1 = load volatile i32, ptr addrspace(1) %arg0
   %tmp2 = atomicrmw umin ptr addrspace(3) %ptr0, i32 %tmp1 seq_cst
   %tmp3 = atomicrmw umax ptr addrspace(3) %ptr0, i32 %tmp1 seq_cst
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
index d33196ba33109f..2014c9d2a1b420 100644
--- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
 ; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=tahiti -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
-declare void @llvm.amdgcn.s.barrier() #2
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone
+declare void @llvm.amdgcn.s.barrier() nounwind convergent
 
 ; The required pointer calculations for the alloca'd actually requires
 ; an add and won't be folded into the addressing, which fails with a
@@ -24,7 +24,7 @@ declare void @llvm.amdgcn.s.barrier() #2
 
 ; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 64
 ; SI-PROMOTE: ds_write_b32 [[PTRREG]]
-define amdgpu_kernel void @test_private_array_ptr_calc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) #0 {
+define amdgpu_kernel void @test_private_array_ptr_calc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
   %alloca = alloca [16 x i32], align 16, addrspace(5)
   %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);
   %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
@@ -43,8 +43,4 @@ define amdgpu_kernel void @test_private_array_ptr_calc(ptr addrspace(1) noalias
   ret void
 }
 
-attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind convergent }
-
 !0 = !{i32 0, i32 65536 }
diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll
index a01dc0294ddc74..d867c9ece66ad9 100644
--- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone
 
 ; SI-LABEL: {{^}}test_array_ptr_calc:
 ; SI-DAG: v_mul_u32_u24
@@ -18,5 +18,3 @@ define amdgpu_kernel void @test_array_ptr_calc(ptr addrspace(1) noalias %out, pt
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
index f2794ff9700506..3c972ad58723ab 100644
--- a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
@@ -22,7 +22,7 @@
 ; CIVI: s_and_b32
 ; CIVI: s_or_b32
 
-define amdgpu_kernel void @s_ashr_v2i16(ptr addrspace(1) %out, i32, <2 x i16> %lhs, i32, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_ashr_v2i16(ptr addrspace(1) %out, i32, <2 x i16> %lhs, i32, <2 x i16> %rhs) nounwind {
   %result = ashr <2 x i16> %lhs, %rhs
   store <2 x i16> %result, ptr addrspace(1) %out
   ret void
@@ -44,7 +44,7 @@ define amdgpu_kernel void @s_ashr_v2i16(ptr addrspace(1) %out, i32, <2 x i16> %l
 ; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
 ; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @v_ashr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_ashr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -61,7 +61,7 @@ define amdgpu_kernel void @v_ashr_v2i16(ptr addrspace(1) %out, ptr addrspace(1)
 ; GFX9: s_load_dword [[RHS:s[0-9]+]]
 ; GFX9: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define amdgpu_kernel void @ashr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @ashr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -76,7 +76,7 @@ define amdgpu_kernel void @ashr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1
 ; GFX9: s_load_dword [[LHS:s[0-9]+]]
 ; GFX9: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define amdgpu_kernel void @ashr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @ashr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -90,7 +90,7 @@ define amdgpu_kernel void @ashr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN-LABEL: {{^}}ashr_imm_v_v2i16:
 ; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], -4
-define amdgpu_kernel void @ashr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @ashr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -104,7 +104,7 @@ define amdgpu_kernel void @ashr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace
 ; GCN-LABEL: {{^}}ashr_v_imm_v2i16:
 ; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], 8, [[LHS]]
-define amdgpu_kernel void @ashr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @ashr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -128,7 +128,7 @@ define amdgpu_kernel void @ashr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace
 ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
 ; GCN: {{buffer|flat|global}}_store_dwordx2
-define amdgpu_kernel void @v_ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -146,7 +146,7 @@ define amdgpu_kernel void @v_ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1)
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; GCN: {{buffer|flat|global}}_store_dwordx2
-define amdgpu_kernel void @ashr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @ashr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -157,7 +157,4 @@ define amdgpu_kernel void @ashr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
index 19a1d2d9dbd1ef..3164de4a3616a4 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1100 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1200 %s
 
-define float @syncscope_system(ptr %addr, float %val) #0 {
+define float @syncscope_system(ptr %addr, float %val) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX908-LABEL: syncscope_system:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -119,7 +119,7 @@ define float @syncscope_system(ptr %addr, float %val) #0 {
   ret float %res
 }
 
-define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
+define float @syncscope_workgroup_rtn(ptr %addr, float %val) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX908-LABEL: syncscope_workgroup_rtn:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -222,7 +222,7 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
   ret float %res
 }
 
-define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
+define void @syncscope_workgroup_nortn(ptr %addr, float %val) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX908-LABEL: syncscope_workgroup_nortn:
 ; GFX908:       ; %bb.0: ; %atomicrmw.check.shared
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -462,5 +462,3 @@ define float @no_unsafe(ptr %addr, float %val) {
   %res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst
   ret float %res
 }
-
-attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll
index 001a4e999aee98..144f7fa80539a5 100644
--- a/llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomics-hw-remarks-gfx90a.ll
@@ -14,7 +14,7 @@
 ; GFX90A-HW-LABEL: atomic_add_unsafe_hw:
 ; GFX90A-HW:    ds_add_f64 v2, v[0:1]
 ; GFX90A-HW:    s_endpgm
-define amdgpu_kernel void @atomic_add_unsafe_hw(ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @atomic_add_unsafe_hw(ptr addrspace(3) %ptr) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 main_body:
   %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
   ret void
@@ -23,7 +23,7 @@ main_body:
 ; GFX90A-HW-LABEL: atomic_add_unsafe_hw_agent:
 ; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[4:5]
 ; GFX90A-HW:    s_endpgm
-define amdgpu_kernel void @atomic_add_unsafe_hw_agent(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_kernel void @atomic_add_unsafe_hw_agent(ptr addrspace(1) %ptr, float %val) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
   ret void
@@ -32,7 +32,7 @@ main_body:
 ; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wg:
 ; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[4:5]
 ; GFX90A-HW:    s_endpgm
-define amdgpu_kernel void @atomic_add_unsafe_hw_wg(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_kernel void @atomic_add_unsafe_hw_wg(ptr addrspace(1) %ptr, float %val) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("workgroup") monotonic, align 4
   ret void
@@ -41,7 +41,7 @@ main_body:
 ; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wavefront:
 ; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[4:5]
 ; GFX90A-HW:    s_endpgm
-define amdgpu_kernel void @atomic_add_unsafe_hw_wavefront(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_kernel void @atomic_add_unsafe_hw_wavefront(ptr addrspace(1) %ptr, float %val) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("wavefront") monotonic, align 4
   ret void
@@ -50,7 +50,7 @@ main_body:
 ; GFX90A-HW-LABEL: atomic_add_unsafe_hw_single_thread:
 ; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[4:5]
 ; GFX90A-HW:    s_endpgm
-define amdgpu_kernel void @atomic_add_unsafe_hw_single_thread(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_kernel void @atomic_add_unsafe_hw_single_thread(ptr addrspace(1) %ptr, float %val) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("singlethread") monotonic, align 4
   ret void
@@ -59,7 +59,7 @@ main_body:
 ; GFX90A-HW-LABEL: atomic_add_unsafe_hw_aoa:
 ; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[4:5]
 ; GFX90A-HW:    s_endpgm
-define amdgpu_kernel void @atomic_add_unsafe_hw_aoa(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_kernel void @atomic_add_unsafe_hw_aoa(ptr addrspace(1) %ptr, float %val) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent-one-as") monotonic, align 4
   ret void
@@ -68,7 +68,7 @@ main_body:
 ; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wgoa:
 ; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[4:5]
 ; GFX90A-HW:    s_endpgm
-define amdgpu_kernel void @atomic_add_unsafe_hw_wgoa(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_kernel void @atomic_add_unsafe_hw_wgoa(ptr addrspace(1) %ptr, float %val) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("workgroup-one-as") monotonic, align 4
   ret void
@@ -77,7 +77,7 @@ main_body:
 ; GFX90A-HW-LABEL: atomic_add_unsafe_hw_wfoa:
 ; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[4:5]
 ; GFX90A-HW:    s_endpgm
-define amdgpu_kernel void @atomic_add_unsafe_hw_wfoa(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_kernel void @atomic_add_unsafe_hw_wfoa(ptr addrspace(1) %ptr, float %val) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("wavefront-one-as") monotonic, align 4
   ret void
@@ -86,10 +86,8 @@ main_body:
 ; GFX90A-HW-LABEL: atomic_add_unsafe_hw_stoa:
 ; GFX90A-HW:    global_atomic_add_f32 v0, v1, s[4:5]
 ; GFX90A-HW:    s_endpgm
-define amdgpu_kernel void @atomic_add_unsafe_hw_stoa(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_kernel void @atomic_add_unsafe_hw_stoa(ptr addrspace(1) %ptr, float %val) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 main_body:
   %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("singlethread-one-as") monotonic, align 4
   ret void
 }
-
-attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
index e4d427a0b826f8..801b05ac74d990 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
@@ -540,11 +540,10 @@ define internal void @use256vgprs() {
 ; GFX11WGP-WAVE64: NumVgprs: 256
 ; GFX11CU-WAVE32: NumVgprs: 256
 ; GFX11CU-WAVE64: NumVgprs: 256
-define amdgpu_kernel void @f256() #256 {
+define amdgpu_kernel void @f256() nounwind "amdgpu-flat-work-group-size"="256,256" {
   call void @use256vgprs()
   ret void
 }
-attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" }
 
 ; GCN-LABEL: {{^}}f512:
 ; GFX9: NumVgprs: 128
@@ -559,12 +558,11 @@ attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" }
 ; GFX11WGP-WAVE64: NumVgprs: 256
 ; GFX11CU-WAVE32: NumVgprs: 192
 ; GFX11CU-WAVE64: NumVgprs: 192
-define amdgpu_kernel void @f512() #512 {
+define amdgpu_kernel void @f512() nounwind "amdgpu-flat-work-group-size"="512,512" {
   call void @foo()
   call void @use256vgprs()
   ret void
 }
-attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
 
 ; GCN-LABEL: {{^}}f1024:
 ; GFX9: NumVgprs: 64
@@ -578,12 +576,10 @@ attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
 ; GFX11WGP-WAVE64: NumVgprs: 192
 ; GFX11CU-WAVE32: NumVgprs: 96
 ; GFX11CU-WAVE64: NumVgprs: 96
-define amdgpu_kernel void @f1024() #1024 {
+define amdgpu_kernel void @f1024() nounwind "amdgpu-flat-work-group-size"="1024,1024" {
   call void @foo()
   call void @use256vgprs()
   ret void
 }
 
-attributes #1024 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" }
-
 declare void @foo()
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
index fc13b86566f76c..d1acbf42de3a29 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
@@ -6,33 +6,30 @@
 ; CHECK: VGPRBlocks: 0
 ; CHECK: NumSGPRsForWavesPerEU: 1
 ; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @min_64_max_64() #0 {
+define amdgpu_kernel void @min_64_max_64() "amdgpu-flat-work-group-size"="64,64" {
 entry:
   ret void
 }
-attributes #0 = {"amdgpu-flat-work-group-size"="64,64"}
 
 ; CHECK-LABEL: {{^}}min_64_max_128:
 ; CHECK: SGPRBlocks: 0
 ; CHECK: VGPRBlocks: 0
 ; CHECK: NumSGPRsForWavesPerEU: 1
 ; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @min_64_max_128() #1 {
+define amdgpu_kernel void @min_64_max_128() "amdgpu-flat-work-group-size"="64,128" {
 entry:
   ret void
 }
-attributes #1 = {"amdgpu-flat-work-group-size"="64,128"}
 
 ; CHECK-LABEL: {{^}}min_128_max_128:
 ; CHECK: SGPRBlocks: 0
 ; CHECK: VGPRBlocks: 0
 ; CHECK: NumSGPRsForWavesPerEU: 1
 ; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @min_128_max_128() #2 {
+define amdgpu_kernel void @min_128_max_128() "amdgpu-flat-work-group-size"="128,128" {
 entry:
   ret void
 }
-attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
 
 ; CHECK-LABEL: {{^}}min_1024_max_1024
 ; CHECK: SGPRBlocks: 0
@@ -40,7 +37,7 @@ attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
 ; CHECK: NumSGPRsForWavesPerEU: 2{{$}}
 ; CHECK: NumVGPRsForWavesPerEU: 43
 @var = addrspace(1) global float 0.0
-define amdgpu_kernel void @min_1024_max_1024() #3 {
+define amdgpu_kernel void @min_1024_max_1024() "amdgpu-flat-work-group-size"="1024,1024" {
   %val0 = load volatile float, ptr addrspace(1) @var
   %val1 = load volatile float, ptr addrspace(1) @var
   %val2 = load volatile float, ptr addrspace(1) @var
@@ -127,7 +124,6 @@ define amdgpu_kernel void @min_1024_max_1024() #3 {
 
   ret void
 }
-attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
index ed045107d354dc..886f944701b4b0 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
@@ -6,7 +6,7 @@
 
 ; ALL: SGPRBlocks: 1
 ; ALL: NumSGPRsForWavesPerEU: 10
-define amdgpu_kernel void @max_10_sgprs() #0 {
+define amdgpu_kernel void @max_10_sgprs() nounwind "amdgpu-num-sgpr"="14" {
   %one = load volatile i32, ptr addrspace(4) undef
   %two = load volatile i32, ptr addrspace(4) undef
   %three = load volatile i32, ptr addrspace(4) undef
@@ -61,7 +61,7 @@ define amdgpu_kernel void @max_10_sgprs() #0 {
 ;                                        ptr addrspace(1) %out2,
 ;                                        ptr addrspace(1) %out3,
 ;                                        ptr addrspace(1) %out4,
-;                                        i32 %one, i32 %two, i32 %three, i32 %four) #2 {
+;                                        i32 %one, i32 %two, i32 %three, i32 %four) nounwind "amdgpu-num-sgpr"="12" {
 ;  %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
 ;  %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
 ;  %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
@@ -98,7 +98,7 @@ define amdgpu_kernel void @max_10_sgprs() #0 {
 ;                                        ptr addrspace(1) %out2,
 ;                                        ptr addrspace(1) %out3,
 ;                                        ptr addrspace(1) %out4,
-;                                        i32 %one, i32 %two, i32 %three, i32 %four) #2 {
+;                                        i32 %one, i32 %two, i32 %three, i32 %four) nounwind "amdgpu-num-sgpr"="12" {
 ;  store volatile i32 0, ptr undef
 ;  %x.0 = call i32 @llvm.amdgcn.workgroup.id.x()
 ;  store volatile i32 %x.0, ptr addrspace(1) undef
@@ -118,14 +118,9 @@ define amdgpu_kernel void @max_10_sgprs() #0 {
 ;  ret void
 ;}
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workgroup.id.z() #1
-declare i64 @llvm.amdgcn.dispatch.id() #1
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1
-declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #1
-
-attributes #0 = { nounwind "amdgpu-num-sgpr"="14" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "amdgpu-num-sgpr"="12" }
-attributes #3 = { nounwind "amdgpu-num-sgpr"="11" }
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone
+declare i64 @llvm.amdgcn.dispatch.id() nounwind readnone
+declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone
+declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll
index a1594a83d7dd83..df53c87b8a7f76 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: {{^}}max_20_vgprs:
 ; CHECK: VGPRBlocks: 4
 ; CHECK: NumVGPRsForWavesPerEU: 20
-define amdgpu_kernel void @max_20_vgprs() #1 {
+define amdgpu_kernel void @max_20_vgprs() "amdgpu-num-vgpr"="20" {
   %val0 = load volatile float, ptr addrspace(1) @var
   %val1 = load volatile float, ptr addrspace(1) @var
   %val2 = load volatile float, ptr addrspace(1) @var
@@ -72,4 +72,3 @@ define amdgpu_kernel void @max_20_vgprs() #1 {
 
   ret void
 }
-attributes #1 = {"amdgpu-num-vgpr"="20"}
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll
index bc58222076ac0e..823a8bbd5e667a 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups.ll
@@ -9,41 +9,36 @@ entry:
 
 ; Ignore if number of work groups for x dimension is 0.
 ; CHECK-LABEL: {{^}}empty_max_num_workgroups_x0:
-define amdgpu_kernel void @empty_max_num_workgroups_x0() #0 {
+define amdgpu_kernel void @empty_max_num_workgroups_x0() "amdgpu-max-num-workgroups"="0,2,3" {
 entry:
   ret void
 }
-attributes #0 = {"amdgpu-max-num-workgroups"="0,2,3"}
 
 ; Ignore if number of work groups for y dimension is 0.
 ; CHECK-LABEL: {{^}}empty_max_num_workgroups_y0:
-define amdgpu_kernel void @empty_max_num_workgroups_y0() #1 {
+define amdgpu_kernel void @empty_max_num_workgroups_y0() "amdgpu-max-num-workgroups"="1,0,3" {
 entry:
   ret void
 }
-attributes #1 = {"amdgpu-max-num-workgroups"="1,0,3"}
 
 ; Ignore if number of work groups for z dimension is 0.
 ; CHECK-LABEL: {{^}}empty_max_num_workgroups_z0:
-define amdgpu_kernel void @empty_max_num_workgroups_z0() #2 {
+define amdgpu_kernel void @empty_max_num_workgroups_z0() "amdgpu-max-num-workgroups"="1,2,0" {
 entry:
   ret void
 }
-attributes #2 = {"amdgpu-max-num-workgroups"="1,2,0"}
 
 ; CHECK-LABEL: {{^}}empty_max_num_workgroups_1_2_3:
-define amdgpu_kernel void @empty_max_num_workgroups_1_2_3() #3 {
+define amdgpu_kernel void @empty_max_num_workgroups_1_2_3() "amdgpu-max-num-workgroups"="1,2,3" {
 entry:
   ret void
 }
-attributes #3 = {"amdgpu-max-num-workgroups"="1,2,3"}
 
 ; CHECK-LABEL: {{^}}empty_max_num_workgroups_1024_1024_1024:
-define amdgpu_kernel void @empty_max_num_workgroups_1024_1024_1024() #4 {
+define amdgpu_kernel void @empty_max_num_workgroups_1024_1024_1024() "amdgpu-max-num-workgroups"="1024,1024,1024" {
 entry:
   ret void
 }
-attributes #4 = {"amdgpu-max-num-workgroups"="1024,1024,1024"}
 
 
 ; CHECK: .amdgpu_metadata
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll
index 6d86d2d7c1a343..44b682cd386565 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-workgroups_error_check.ll
@@ -1,71 +1,61 @@
 ; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck --check-prefix=ERROR %s
 
 ; ERROR: error: can't parse integer attribute -1 in amdgpu-max-num-workgroups
-define amdgpu_kernel void @empty_max_num_workgroups_neg_num1() #21 {
+define amdgpu_kernel void @empty_max_num_workgroups_neg_num1() "amdgpu-max-num-workgroups"="-1,2,3" {
 entry:
   ret void
 }
-attributes #21 = {"amdgpu-max-num-workgroups"="-1,2,3"}
 
 ; ERROR: error: can't parse integer attribute -2 in amdgpu-max-num-workgroups
-define amdgpu_kernel void @empty_max_num_workgroups_neg_num2() #22 {
+define amdgpu_kernel void @empty_max_num_workgroups_neg_num2() "amdgpu-max-num-workgroups"="1,-2,3" {
 entry:
   ret void
 }
-attributes #22 = {"amdgpu-max-num-workgroups"="1,-2,3"}
 
 ; ERROR: error: can't parse integer attribute -3 in amdgpu-max-num-workgroups
-define amdgpu_kernel void @empty_max_num_workgroups_neg_num3() #23 {
+define amdgpu_kernel void @empty_max_num_workgroups_neg_num3() "amdgpu-max-num-workgroups"="1,2,-3" {
 entry:
   ret void
 }
-attributes #23 = {"amdgpu-max-num-workgroups"="1,2,-3"}
 
 ; ERROR: error: can't parse integer attribute 1.0 in amdgpu-max-num-workgroups
-define amdgpu_kernel void @empty_max_num_workgroups_non_int1() #31 {
+define amdgpu_kernel void @empty_max_num_workgroups_non_int1() "amdgpu-max-num-workgroups"="1.0,2,3" {
 entry:
   ret void
 }
-attributes #31 = {"amdgpu-max-num-workgroups"="1.0,2,3"}
 
 ; ERROR: error: can't parse integer attribute 2.0 in amdgpu-max-num-workgroups
-define amdgpu_kernel void @empty_max_num_workgroups_non_int2() #32 {
+define amdgpu_kernel void @empty_max_num_workgroups_non_int2() "amdgpu-max-num-workgroups"="1,2.0,3" {
 entry:
   ret void
 }
-attributes #32 = {"amdgpu-max-num-workgroups"="1,2.0,3"}
 
 ; ERROR: error: can't parse integer attribute 3.0 in amdgpu-max-num-workgroups
-define amdgpu_kernel void @empty_max_num_workgroups_non_int3() #33 {
+define amdgpu_kernel void @empty_max_num_workgroups_non_int3() "amdgpu-max-num-workgroups"="1,2,3.0" {
 entry:
   ret void
 }
-attributes #33 = {"amdgpu-max-num-workgroups"="1,2,3.0"}
 
 ; ERROR: error: can't parse integer attribute 10000000000 in amdgpu-max-num-workgroups
-define amdgpu_kernel void @empty_max_num_workgroups_too_large() #41 {
+define amdgpu_kernel void @empty_max_num_workgroups_too_large() "amdgpu-max-num-workgroups"="10000000000,2,3" {
 entry:
   ret void
 }
-attributes #41 = {"amdgpu-max-num-workgroups"="10000000000,2,3"}
 
 ; ERROR: error: attribute amdgpu-max-num-workgroups has incorrect number of integers; expected 3
-define amdgpu_kernel void @empty_max_num_workgroups_1_arg() #51 {
+define amdgpu_kernel void @empty_max_num_workgroups_1_arg() "amdgpu-max-num-workgroups"="1" {
 entry:
   ret void
 }
-attributes #51 = {"amdgpu-max-num-workgroups"="1"}
 
 ; ERROR: error: attribute amdgpu-max-num-workgroups has incorrect number of integers; expected 3
-define amdgpu_kernel void @empty_max_num_workgroups_2_args() #52 {
+define amdgpu_kernel void @empty_max_num_workgroups_2_args() "amdgpu-max-num-workgroups"="1,2" {
 entry:
   ret void
 }
-attributes #52 = {"amdgpu-max-num-workgroups"="1,2"}
 
 ; ERROR: error: attribute amdgpu-max-num-workgroups has incorrect number of integers; expected 3
-define amdgpu_kernel void @empty_max_num_workgroups_4_args() #53 {
+define amdgpu_kernel void @empty_max_num_workgroups_4_args() "amdgpu-max-num-workgroups"="1,2,3,4" {
 entry:
   ret void
 }
-attributes #53 = {"amdgpu-max-num-workgroups"="1,2,3,4"}
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
index 7da058ca6ee7e7..6e89f8cd3dc1a8 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll
@@ -6,11 +6,10 @@
 ; CHECK: VGPRBlocks: 32
 ; CHECK: NumSGPRsForWavesPerEU: 102
 ; CHECK: NumVGPRsForWavesPerEU: 129
-define amdgpu_kernel void @empty_exactly_1() #0 {
+define amdgpu_kernel void @empty_exactly_1() "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,64" {
 entry:
   ret void
 }
-attributes #0 = {"amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,64" }
 
 ; Exactly 5 waves per execution unit.
 ; CHECK-LABEL: {{^}}empty_exactly_5:
@@ -18,11 +17,10 @@ attributes #0 = {"amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,64
 ; CHECK: VGPRBlocks: 10
 ; CHECK: NumSGPRsForWavesPerEU: 102
 ; CHECK: NumVGPRsForWavesPerEU: 41
-define amdgpu_kernel void @empty_exactly_5() #1 {
+define amdgpu_kernel void @empty_exactly_5() "amdgpu-waves-per-eu"="5,5" {
 entry:
   ret void
 }
-attributes #1 = {"amdgpu-waves-per-eu"="5,5"}
 
 ; Exactly 10 waves per execution unit.
 ; CHECK-LABEL: {{^}}empty_exactly_10:
@@ -30,11 +28,10 @@ attributes #1 = {"amdgpu-waves-per-eu"="5,5"}
 ; CHECK: VGPRBlocks: 0
 ; CHECK: NumSGPRsForWavesPerEU: 1
 ; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @empty_exactly_10() #2 {
+define amdgpu_kernel void @empty_exactly_10() "amdgpu-waves-per-eu"="10,10" {
 entry:
   ret void
 }
-attributes #2 = {"amdgpu-waves-per-eu"="10,10"}
 
 ; At least 1 wave per execution unit.
 ; CHECK-LABEL: {{^}}empty_at_least_1:
@@ -42,11 +39,10 @@ attributes #2 = {"amdgpu-waves-per-eu"="10,10"}
 ; CHECK: VGPRBlocks: 0
 ; CHECK: NumSGPRsForWavesPerEU: 1
 ; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @empty_at_least_1() #3 {
+define amdgpu_kernel void @empty_at_least_1() "amdgpu-waves-per-eu"="1" {
 entry:
   ret void
 }
-attributes #3 = {"amdgpu-waves-per-eu"="1"}
 
 ; At least 5 waves per execution unit.
 ; CHECK-LABEL: {{^}}empty_at_least_5:
@@ -54,11 +50,10 @@ attributes #3 = {"amdgpu-waves-per-eu"="1"}
 ; CHECK: VGPRBlocks: 0
 ; CHECK: NumSGPRsForWavesPerEU: 1
 ; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @empty_at_least_5() #4 {
+define amdgpu_kernel void @empty_at_least_5() "amdgpu-waves-per-eu"="5" {
 entry:
   ret void
 }
-attributes #4 = {"amdgpu-waves-per-eu"="5"}
 
 ; At least 10 waves per execution unit.
 ; CHECK-LABEL: {{^}}empty_at_least_10:
@@ -66,11 +61,10 @@ attributes #4 = {"amdgpu-waves-per-eu"="5"}
 ; CHECK: VGPRBlocks: 0
 ; CHECK: NumSGPRsForWavesPerEU: 1
 ; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @empty_at_least_10() #5 {
+define amdgpu_kernel void @empty_at_least_10() "amdgpu-waves-per-eu"="10" {
 entry:
   ret void
 }
-attributes #5 = {"amdgpu-waves-per-eu"="10"}
 
 ; At most 1 wave per execution unit (same as @empty_exactly_1).
 
@@ -80,11 +74,10 @@ attributes #5 = {"amdgpu-waves-per-eu"="10"}
 ; CHECK: VGPRBlocks: 10
 ; CHECK: NumSGPRsForWavesPerEU: 102
 ; CHECK: NumVGPRsForWavesPerEU: 41
-define amdgpu_kernel void @empty_at_most_5() #6 {
+define amdgpu_kernel void @empty_at_most_5() "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="1,64" {
 entry:
   ret void
 }
-attributes #6 = {"amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="1,64"}
 
 ; At most 10 waves per execution unit.
 ; CHECK-LABEL: {{^}}empty_at_most_10:
@@ -92,11 +85,10 @@ attributes #6 = {"amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="1,64
 ; CHECK: VGPRBlocks: 0
 ; CHECK: NumSGPRsForWavesPerEU: 1
 ; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @empty_at_most_10() #7 {
+define amdgpu_kernel void @empty_at_most_10() "amdgpu-waves-per-eu"="1,10" {
 entry:
   ret void
 }
-attributes #7 = {"amdgpu-waves-per-eu"="1,10"}
 
 ; Between 1 and 5 waves per execution unit (same as @empty_at_most_5).
 
@@ -106,11 +98,10 @@ attributes #7 = {"amdgpu-waves-per-eu"="1,10"}
 ; CHECK: VGPRBlocks: 0
 ; CHECK: NumSGPRsForWavesPerEU: 1
 ; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @empty_between_5_and_10() #8 {
+define amdgpu_kernel void @empty_between_5_and_10() "amdgpu-waves-per-eu"="5,10" {
 entry:
   ret void
 }
-attributes #8 = {"amdgpu-waves-per-eu"="5,10"}
 
 @var = addrspace(1) global float 0.0
 
@@ -120,7 +111,7 @@ attributes #8 = {"amdgpu-waves-per-eu"="5,10"}
 ; CHECK: VGPRBlocks: 5
 ; CHECK: NumSGPRsForWavesPerEU: 12
 ; CHECK: NumVGPRsForWavesPerEU: 24
-define amdgpu_kernel void @exactly_10() #9 {
+define amdgpu_kernel void @exactly_10() "amdgpu-waves-per-eu"="10,10" {
   %val0 = load volatile float, ptr addrspace(1) @var
   %val1 = load volatile float, ptr addrspace(1) @var
   %val2 = load volatile float, ptr addrspace(1) @var
@@ -187,7 +178,6 @@ define amdgpu_kernel void @exactly_10() #9 {
 
   ret void
 }
-attributes #9 = {"amdgpu-waves-per-eu"="10,10"}
 
 ; Exactly 256 workitems and exactly 2 waves.
 ; CHECK-LABEL: {{^}}empty_workitems_exactly_256_waves_exactly_2:
@@ -195,8 +185,7 @@ attributes #9 = {"amdgpu-waves-per-eu"="10,10"}
 ; CHECK: VGPRBlocks: 21
 ; CHECK: NumSGPRsForWavesPerEU: 102
 ; CHECK: NumVGPRsForWavesPerEU: 85
-define amdgpu_kernel void @empty_workitems_exactly_256_waves_exactly_2() #10 {
+define amdgpu_kernel void @empty_workitems_exactly_256_waves_exactly_2() "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2" {
 entry:
   ret void
 }
-attributes #10 = {"amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2"}
diff --git a/llvm/test/CodeGen/AMDGPU/attr-unparseable.ll b/llvm/test/CodeGen/AMDGPU/attr-unparseable.ll
index 8eb393f2e634b1..464a8100c5fde1 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-unparseable.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-unparseable.ll
@@ -1,57 +1,49 @@
 ; RUN: not llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck %s
 
 ; CHECK: cannot parse integer attribute amdgpu-num-sgpr
-define amdgpu_kernel void @unparseable_single_0() #0 {
+define amdgpu_kernel void @unparseable_single_0() "amdgpu-num-sgpr" {
 entry:
   ret void
 }
-attributes #0 = {"amdgpu-num-sgpr"}
 
 ; CHECK: cannot parse integer attribute amdgpu-num-sgpr
-define amdgpu_kernel void @unparseable_single_1() #1 {
+define amdgpu_kernel void @unparseable_single_1() "amdgpu-num-sgpr"="k" {
 entry:
   ret void
 }
-attributes #1 = {"amdgpu-num-sgpr"="k"}
 
 ; CHECK: cannot parse integer attribute amdgpu-num-sgpr
-define amdgpu_kernel void @unparseable_single_2() #2 {
+define amdgpu_kernel void @unparseable_single_2() "amdgpu-num-sgpr"="1,2" {
 entry:
   ret void
 }
-attributes #2 = {"amdgpu-num-sgpr"="1,2"}
 
 ; CHECK: can't parse first integer attribute amdgpu-flat-work-group-size
-define amdgpu_kernel void @unparseable_pair_0() #3 {
+define amdgpu_kernel void @unparseable_pair_0() "amdgpu-flat-work-group-size" {
 entry:
   ret void
 }
-attributes #3 = {"amdgpu-flat-work-group-size"}
 
 ; CHECK: can't parse first integer attribute amdgpu-flat-work-group-size
-define amdgpu_kernel void @unparseable_pair_1() #4 {
+define amdgpu_kernel void @unparseable_pair_1() "amdgpu-flat-work-group-size"="k" {
 entry:
   ret void
 }
-attributes #4 = {"amdgpu-flat-work-group-size"="k"}
 
 ; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size
-define amdgpu_kernel void @unparseable_pair_2() #5 {
+define amdgpu_kernel void @unparseable_pair_2() "amdgpu-flat-work-group-size"="1" {
 entry:
   ret void
 }
-attributes #5 = {"amdgpu-flat-work-group-size"="1"}
 
 ; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size
-define amdgpu_kernel void @unparseable_pair_3() #6 {
+define amdgpu_kernel void @unparseable_pair_3() "amdgpu-flat-work-group-size"="1,k" {
 entry:
   ret void
 }
-attributes #6 = {"amdgpu-flat-work-group-size"="1,k"}
 
 ; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size
-define amdgpu_kernel void @unparseable_pair_4() #7 {
+define amdgpu_kernel void @unparseable_pair_4() "amdgpu-flat-work-group-size"="1,2,3" {
 entry:
   ret void
 }
-attributes #7 = {"amdgpu-flat-work-group-size"="1,2,3"}
diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
index 632f7dbc53373c..38df1d7d44f52e 100644
--- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
+++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
@@ -9,7 +9,7 @@
 ; Subtargets must wait for outstanding memory instructions before a barrier if
 ; they cannot back off of the barrier.
 
-define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
+define void @back_off_barrier_no_fence(ptr %in, ptr %out) nounwind {
 ; GFX9-NO-BACKOFF-LABEL: back_off_barrier_no_fence:
 ; GFX9-NO-BACKOFF:       ; %bb.0:
 ; GFX9-NO-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -66,7 +66,7 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
   ret void
 }
 
-define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 {
+define void @back_off_barrier_with_fence(ptr %in, ptr %out) nounwind {
 ; GFX9-NO-BACKOFF-LABEL: back_off_barrier_with_fence:
 ; GFX9-NO-BACKOFF:       ; %bb.0:
 ; GFX9-NO-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -131,5 +131,3 @@ define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 {
 }
 
 declare void @llvm.amdgcn.s.barrier()
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll b/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
index fed4d9b6a373f3..91116ac1c9f030 100644
--- a/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
@@ -4,14 +4,14 @@
 ; CHECK-LABEL: {{^}}unknown_wgs:
 ; CHECK: s_barrier
 define amdgpu_kernel void @unknown_wgs() {
-  tail call void @llvm.amdgcn.s.barrier() #0
+  tail call void @llvm.amdgcn.s.barrier() convergent nounwind
   ret void
 }
 
 ; CHECK-LABEL: {{^}}flat_wgs_attr_32_128:
 ; CHECK: s_barrier
-define amdgpu_kernel void @flat_wgs_attr_32_128() #1 {
-  tail call void @llvm.amdgcn.s.barrier() #0
+define amdgpu_kernel void @flat_wgs_attr_32_128() nounwind "amdgpu-flat-work-group-size"="32,128" {
+  tail call void @llvm.amdgcn.s.barrier() convergent nounwind
   ret void
 }
 
@@ -19,13 +19,9 @@ define amdgpu_kernel void @flat_wgs_attr_32_128() #1 {
 ; CHECK: :
 ; CHECK-NEXT: ; wave barrier
 ; CHECK-NEXT: s_endpgm
-define amdgpu_kernel void @flat_wgs_attr_32_64() #2 {
-  tail call void @llvm.amdgcn.s.barrier() #0
+define amdgpu_kernel void @flat_wgs_attr_32_64() nounwind "amdgpu-flat-work-group-size"="32,64" {
+  tail call void @llvm.amdgcn.s.barrier() convergent nounwind
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #0
-
-attributes #0 = { convergent nounwind }
-attributes #1 = { nounwind "amdgpu-flat-work-group-size"="32,128" }
-attributes #2 = { nounwind "amdgpu-flat-work-group-size"="32,64" }
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/basic-branch.ll b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
index 2a7206c6ac43f5..6f96c17e57c61d 100644
--- a/llvm/test/CodeGen/AMDGPU/basic-branch.ll
+++ b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
@@ -17,7 +17,7 @@
 
 ; GCN: {{^}}[[END]]:
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_branch(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %val) #0 {
+define amdgpu_kernel void @test_branch(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %val) nounwind {
   %cmp = icmp ne i32 %val, 0
   br i1 %cmp, label %store, label %end
 
@@ -41,7 +41,7 @@ end:
 
 ; GCN: {{^}}[[END]]:
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_brcc_i1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i1 %val) #0 {
+define amdgpu_kernel void @test_brcc_i1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i1 %val) nounwind {
   %cmp0 = icmp ne i1 %val, 0
   br i1 %cmp0, label %store, label %end
 
@@ -52,5 +52,3 @@ store:
 end:
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/basic-call-return.ll b/llvm/test/CodeGen/AMDGPU/basic-call-return.ll
index e47e4c1f9caaec..28da4c53dc5a3f 100644
--- a/llvm/test/CodeGen/AMDGPU/basic-call-return.ll
+++ b/llvm/test/CodeGen/AMDGPU/basic-call-return.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-define void @void_func_void() #2 {
+define void @void_func_void() nounwind noinline {
   ret void
 }
 
@@ -12,8 +12,8 @@ define amdgpu_kernel void @test_call_void_func_void() {
   ret void
 }
 
-define void @void_func_void_clobber_s40_s41() #2 {
-  call void asm sideeffect "", "~{s[40:41]}"() #0
+define void @void_func_void_clobber_s40_s41() nounwind noinline {
+  call void asm sideeffect "", "~{s[40:41]}"() nounwind
   ret void
 }
 
@@ -21,7 +21,3 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_s40_s41() {
   call void @void_func_void_clobber_s40_s41()
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
index af4116bd6aae5d..a610dff9e24e5b 100644
--- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
 
-define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; SI-LABEL: v_ubfe_sub_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -52,7 +52,7 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; SI-LABEL: v_ubfe_sub_multi_use_shl_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -112,7 +112,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @s_ubfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_ubfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) nounwind {
 ; SI-LABEL: s_ubfe_sub_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -152,7 +152,7 @@ define amdgpu_kernel void @s_ubfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %
   ret void
 }
 
-define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) nounwind {
 ; SI-LABEL: s_ubfe_sub_multi_use_shl_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -200,7 +200,7 @@ define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i
   ret void
 }
 
-define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; SI-LABEL: v_sbfe_sub_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -250,7 +250,7 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; SI-LABEL: v_sbfe_sub_multi_use_shl_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -310,7 +310,7 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @s_sbfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_sbfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) nounwind {
 ; SI-LABEL: s_sbfe_sub_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -350,7 +350,7 @@ define amdgpu_kernel void @s_sbfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %
   ret void
 }
 
-define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) nounwind {
 ; SI-LABEL: s_sbfe_sub_multi_use_shl_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -534,7 +534,4 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(ptr addrspace(1) %out,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/bfm.ll b/llvm/test/CodeGen/AMDGPU/bfm.ll
index f8bd44b7c98f59..b9df8f9c7c966e 100644
--- a/llvm/test/CodeGen/AMDGPU/bfm.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfm.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
 
-define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) nounwind {
 ; SI-LABEL: s_bfm_pattern:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -33,7 +33,7 @@ define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y)
   ret void
 }
 
-define amdgpu_kernel void @s_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
+define amdgpu_kernel void @s_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) nounwind {
 ; SI-LABEL: s_bfm_pattern_simple:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -63,7 +63,7 @@ define amdgpu_kernel void @s_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #
   ret void
 }
 
-define void @v_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
+define void @v_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) nounwind {
 ; SI-LABEL: v_bfm_pattern:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -90,7 +90,7 @@ define void @v_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
   ret void
 }
 
-define void @v_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
+define void @v_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) nounwind {
 ; SI-LABEL: v_bfm_pattern_simple:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -115,5 +115,3 @@ define void @v_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
   store i32 %b, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/big_alu.ll b/llvm/test/CodeGen/AMDGPU/big_alu.ll
index 0daa14a63f21aa..5cbdbf6f399737 100644
--- a/llvm/test/CodeGen/AMDGPU/big_alu.ll
+++ b/llvm/test/CodeGen/AMDGPU/big_alu.ll
@@ -2,7 +2,7 @@
 
 ; This test ensures that R600 backend can handle ifcvt properly
 
-define amdgpu_ps void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 {
+define amdgpu_ps void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) nounwind {
 main_body:
   %tmp = extractelement <4 x float> %reg0, i32 0
   %tmp1 = extractelement <4 x float> %reg0, i32 1
@@ -1297,14 +1297,11 @@ ENDIF178:                                         ; preds = %IF179, %ENDIF175
   ret void
 }
 
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
-declare float @llvm.r600.recipsqrt.clamped.f32(float) #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.exp2.f32(float) #1
+declare float @llvm.r600.dot4(<4 x float>, <4 x float>) nounwind readnone
+declare float @llvm.r600.recipsqrt.clamped.f32(float) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.exp2.f32(float) nounwind readnone
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll b/llvm/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll
index 58f062ba778bd8..facc0fe0c55563 100644
--- a/llvm/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll
@@ -5,7 +5,7 @@
 
 ; CHECK-LABEL: {{^}}test_to_i16:
 ; CHECK: s_endpgm
-define amdgpu_ps void @test_to_i16(ptr addrspace(8) inreg, <4 x half> inreg) #0 {
+define amdgpu_ps void @test_to_i16(ptr addrspace(8) inreg, <4 x half> inreg) nounwind memory(argmem: write) {
   %a_tmp = call <4 x half> @llvm.amdgcn.wqm.v4f16(<4 x half> %1)
   %a_i16_tmp = bitcast <4 x half> %a_tmp to <4 x i16>
   %a_i16 = call <4 x i16> @llvm.amdgcn.wqm.v4i16(<4 x i16> %a_i16_tmp)
@@ -17,7 +17,7 @@ define amdgpu_ps void @test_to_i16(ptr addrspace(8) inreg, <4 x half> inreg) #0
 
 ; CHECK-LABEL: {{^}}test_to_half:
 ; CHECK: s_endpgm
-define amdgpu_ps void @test_to_half(ptr addrspace(8) inreg, <4 x i16> inreg) #0 {
+define amdgpu_ps void @test_to_half(ptr addrspace(8) inreg, <4 x i16> inreg) nounwind memory(argmem: write) {
   %a_tmp = call <4 x i16> @llvm.amdgcn.wqm.v4i16(<4 x i16> %1)
   %a_half_tmp = bitcast <4 x i16> %a_tmp to <4 x half>
   %a_half = call <4 x half> @llvm.amdgcn.wqm.v4f16(<4 x half> %a_half_tmp)
@@ -27,9 +27,6 @@ define amdgpu_ps void @test_to_half(ptr addrspace(8) inreg, <4 x i16> inreg) #0
   ret void
 }
 
-declare <4 x half> @llvm.amdgcn.wqm.v4f16(<4 x half>) #1
-declare <4 x i16> @llvm.amdgcn.wqm.v4i16(<4 x i16>) #1
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32) #0
-
-attributes #0 = { nounwind memory(argmem: write) }
-attributes #1 = { nounwind readonly }
+declare <4 x half> @llvm.amdgcn.wqm.v4f16(<4 x half>) nounwind readonly
+declare <4 x i16> @llvm.amdgcn.wqm.v4i16(<4 x i16>) nounwind readonly
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32) nounwind memory(argmem: write)
diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
index 80732d5de1e20e..fb5cce3e85b3ce 100644
--- a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
@@ -70,8 +70,8 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace(
 
 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
 ; GCN-NOT: store_dword
-define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
-  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
+define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
+  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) nounwind readnone convergent
   %bc = bitcast i64 %undef to <2 x i32>
   store volatile <2 x i32> %bc, ptr addrspace(1) %out
   ret void
@@ -79,15 +79,12 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrs
 
 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
 ; GCN-NOT: store_dword
-define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
-  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
+define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
+  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) nounwind readnone convergent
   %bc = bitcast i64 %undef to <2 x i32>
   %elt1 = extractelement <2 x i32> %bc, i32 1
   store volatile i32 %elt1, ptr addrspace(1) %out
   ret void
 }
 
-declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone convergent }
+declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
index 64555f14a55cc1..544efc90dec02c 100644
--- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
@@ -6,19 +6,19 @@
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=GFX11-FLAT
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -global-isel -verify-machineinstrs | FileCheck %s --check-prefix=GFX11-GISEL
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-declare i16 @llvm.bitreverse.i16(i16) #1
-declare i32 @llvm.bitreverse.i32(i32) #1
-declare i64 @llvm.bitreverse.i64(i64) #1
+declare i16 @llvm.bitreverse.i16(i16) nounwind readnone
+declare i32 @llvm.bitreverse.i32(i32) nounwind readnone
+declare i64 @llvm.bitreverse.i64(i64) nounwind readnone
 
-declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1
-declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1
+declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) nounwind readnone
 
-declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1
-declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
+declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) nounwind readnone
+declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) nounwind readnone
 
-define amdgpu_kernel void @s_brev_i16(ptr addrspace(1) noalias %out, i16 %val) #0 {
+define amdgpu_kernel void @s_brev_i16(ptr addrspace(1) noalias %out, i16 %val) nounwind {
 ; SI-LABEL: s_brev_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -90,12 +90,12 @@ define amdgpu_kernel void @s_brev_i16(ptr addrspace(1) noalias %out, i16 %val) #
 ; GFX11-GISEL-NEXT:    s_nop 0
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
-  %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
+  %brev = call i16 @llvm.bitreverse.i16(i16 %val) nounwind readnone
   store i16 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_brev_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -179,12 +179,12 @@ define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspa
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
   %val = load i16, ptr addrspace(1) %valptr
-  %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
+  %brev = call i16 @llvm.bitreverse.i16(i16 %val) nounwind readnone
   store i16 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_brev_i32(ptr addrspace(1) noalias %out, i32 %val) #0 {
+define amdgpu_kernel void @s_brev_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_brev_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -251,12 +251,12 @@ define amdgpu_kernel void @s_brev_i32(ptr addrspace(1) noalias %out, i32 %val) #
 ; GFX11-GISEL-NEXT:    s_nop 0
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
-  %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
+  %brev = call i32 @llvm.bitreverse.i32(i32 %val) nounwind readnone
   store i32 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_brev_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -339,12 +339,12 @@ define amdgpu_kernel void @v_brev_i32(ptr addrspace(1) noalias %out, ptr addrspa
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
   %val = load i32, ptr addrspace(1) %gep
-  %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
+  %brev = call i32 @llvm.bitreverse.i32(i32 %val) nounwind readnone
   store i32 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_brev_v2i32(ptr addrspace(1) noalias %out, <2 x i32> %val) #0 {
+define amdgpu_kernel void @s_brev_v2i32(ptr addrspace(1) noalias %out, <2 x i32> %val) nounwind {
 ; SI-LABEL: s_brev_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -418,12 +418,12 @@ define amdgpu_kernel void @s_brev_v2i32(ptr addrspace(1) noalias %out, <2 x i32>
 ; GFX11-GISEL-NEXT:    s_nop 0
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
-  %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
+  %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) nounwind readnone
   store <2 x i32> %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_brev_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -512,12 +512,12 @@ define amdgpu_kernel void @v_brev_v2i32(ptr addrspace(1) noalias %out, ptr addrs
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
   %val = load <2 x i32>, ptr addrspace(1) %gep
-  %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
+  %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) nounwind readnone
   store <2 x i32> %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_brev_i64(ptr addrspace(1) noalias %out, i64 %val) #0 {
+define amdgpu_kernel void @s_brev_i64(ptr addrspace(1) noalias %out, i64 %val) nounwind {
 ; SI-LABEL: s_brev_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -583,12 +583,12 @@ define amdgpu_kernel void @s_brev_i64(ptr addrspace(1) noalias %out, i64 %val) #
 ; GFX11-GISEL-NEXT:    s_nop 0
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
-  %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
+  %brev = call i64 @llvm.bitreverse.i64(i64 %val) nounwind readnone
   store i64 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_brev_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -677,12 +677,12 @@ define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspa
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr i64, ptr addrspace(1) %valptr, i32 %tid
   %val = load i64, ptr addrspace(1) %gep
-  %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
+  %brev = call i64 @llvm.bitreverse.i64(i64 %val) nounwind readnone
   store i64 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_brev_v2i64(ptr addrspace(1) noalias %out, <2 x i64> %val) #0 {
+define amdgpu_kernel void @s_brev_v2i64(ptr addrspace(1) noalias %out, <2 x i64> %val) nounwind {
 ; SI-LABEL: s_brev_v2i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -763,12 +763,12 @@ define amdgpu_kernel void @s_brev_v2i64(ptr addrspace(1) noalias %out, <2 x i64>
 ; GFX11-GISEL-NEXT:    s_nop 0
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
-  %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
+  %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) nounwind readnone
   store <2 x i64> %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_brev_v2i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -867,7 +867,7 @@ define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrs
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr <2 x i64> , ptr addrspace(1) %valptr, i32 %tid
   %val = load <2 x i64>, ptr addrspace(1) %gep
-  %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
+  %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) nounwind readnone
   store <2 x i64> %brev, ptr addrspace(1) %out
   ret void
 }
@@ -919,6 +919,3 @@ bb:
   %tmp3 = fpext half %tmp2 to float
   ret float %tmp3
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
index cc05129b1b2af6..57e32b3260288b 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
@@ -22,7 +22,7 @@
 ; GCN: .LBB0_{{[0-9]+}}: ; %UnifiedReturnBlock
 ; GCN-NEXT: s_endpgm
 ; GCN-NEXT: .Lfunc_end
-define amdgpu_ps void @ham(float %arg, float %arg1) #0 {
+define amdgpu_ps void @ham(float %arg, float %arg1) nounwind readonly "InitialPSInputAddr"="36983" {
 bb:
   %tmp = fcmp ogt float %arg, 0.000000e+00
   %tmp2 = fcmp ogt float %arg1, 0.000000e+00
@@ -36,6 +36,3 @@ bb4:                                              ; preds = %bb
 bb5:                                              ; preds = %bb
   ret void
 }
-
-attributes #0 = { nounwind readonly "InitialPSInputAddr"="36983" }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-bundle.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-bundle.ll
index 08f19a518b4442..eb099cb1deab4c 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-bundle.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-bundle.ll
@@ -15,7 +15,7 @@
 ; s_setpc_b64
 ; and some register copies
 
-declare void @func() #0
+declare void @func() nounwind
 
 ; GCN-LABEL: {{^}}bundle_size:
 ; GCN: s_cbranch_scc0 [[BB_EXPANSION:.LBB[0-9]+_[0-9]+]]
@@ -30,7 +30,7 @@ declare void @func() #0
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, func@
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, func@
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @bundle_size(ptr addrspace(1) %arg, i32 %cnd) #0 {
+define amdgpu_kernel void @bundle_size(ptr addrspace(1) %arg, i32 %cnd) nounwind {
 bb:
   %cmp = icmp eq i32 %cnd, 0
   br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
@@ -42,13 +42,10 @@ bb2:
    v_nop_e64
    v_nop_e64
    v_nop_e64
-   v_nop_e64", ""() #0
+   v_nop_e64", ""() nounwind
   br label %bb3
 
 bb3:
   store volatile i32 %cnd, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
index 6201d7341898f5..499424fc9aa220 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -o - %s | FileCheck %s
 
-define amdgpu_kernel void @spill(ptr addrspace(1) %arg, i32 %cnd) #0 {
+define amdgpu_kernel void @spill(ptr addrspace(1) %arg, i32 %cnd) nounwind {
 ; CHECK-LABEL: spill:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_load_dword s44, s[4:5], 0x2
@@ -666,110 +666,110 @@ define amdgpu_kernel void @spill(ptr addrspace(1) %arg, i32 %cnd) #0 {
 ; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    s_endpgm
 entry:
-  %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0
-  %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0
-  %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0
-  %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0
-  %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0
-  %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0
-  %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0
-  %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0
-  %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0
-  %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0
-  %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0
-  %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0
-  %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0
-  %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0
-  %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0
-  %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0
-  %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0
-  %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0
-  %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0
-  %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0
-  %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0
-  %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0
-  %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0
-  %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0
-  %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0
-  %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0
-  %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0
-  %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0
-  %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0
-  %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0
-  %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0
-  %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0
-  %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0
-  %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0
-  %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0
-  %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0
-  %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0
-  %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0
-  %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0
-  %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0
-  %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0
-  %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0
-  %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0
-  %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0
-  %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0
-  %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0
-  %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0
-  %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0
-  %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0
-  %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0
-  %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0
-  %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0
-  %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0
-  %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0
-  %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0
-  %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0
-  %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0
-  %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0
-  %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0
-  %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0
-  %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0
-  %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0
-  %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0
-  %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0
-  %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0
-  %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0
-  %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0
-  %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0
-  %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0
-  %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0
-  %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0
-  %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0
-  %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0
-  %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0
-  %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0
-  %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0
-  %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0
-  %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0
-  %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0
-  %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0
-  %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0
-  %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0
-  %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0
-  %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0
-  %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0
-  %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0
-  %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0
-  %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0
-  %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0
-  %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0
-  %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0
-  %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0
-  %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0
-  %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0
-  %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0
-  %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0
-  %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0
-  %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0
-  %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0
-  %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0
-  %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0
-  %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0
-  %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_lo}"() #0
-  %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_hi}"() #0
+  %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() nounwind
+  %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() nounwind
+  %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() nounwind
+  %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() nounwind
+  %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() nounwind
+  %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() nounwind
+  %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() nounwind
+  %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() nounwind
+  %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() nounwind
+  %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() nounwind
+  %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() nounwind
+  %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() nounwind
+  %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() nounwind
+  %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() nounwind
+  %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() nounwind
+  %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() nounwind
+  %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() nounwind
+  %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() nounwind
+  %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() nounwind
+  %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() nounwind
+  %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() nounwind
+  %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() nounwind
+  %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() nounwind
+  %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() nounwind
+  %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() nounwind
+  %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() nounwind
+  %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() nounwind
+  %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() nounwind
+  %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() nounwind
+  %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() nounwind
+  %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() nounwind
+  %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() nounwind
+  %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() nounwind
+  %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() nounwind
+  %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() nounwind
+  %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() nounwind
+  %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() nounwind
+  %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() nounwind
+  %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() nounwind
+  %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() nounwind
+  %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() nounwind
+  %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() nounwind
+  %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() nounwind
+  %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() nounwind
+  %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() nounwind
+  %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() nounwind
+  %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() nounwind
+  %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() nounwind
+  %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() nounwind
+  %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() nounwind
+  %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() nounwind
+  %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() nounwind
+  %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() nounwind
+  %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() nounwind
+  %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() nounwind
+  %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() nounwind
+  %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() nounwind
+  %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() nounwind
+  %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() nounwind
+  %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() nounwind
+  %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() nounwind
+  %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() nounwind
+  %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() nounwind
+  %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() nounwind
+  %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() nounwind
+  %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() nounwind
+  %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() nounwind
+  %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() nounwind
+  %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() nounwind
+  %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() nounwind
+  %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() nounwind
+  %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() nounwind
+  %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() nounwind
+  %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() nounwind
+  %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() nounwind
+  %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() nounwind
+  %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() nounwind
+  %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() nounwind
+  %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() nounwind
+  %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() nounwind
+  %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() nounwind
+  %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() nounwind
+  %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() nounwind
+  %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() nounwind
+  %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() nounwind
+  %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() nounwind
+  %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() nounwind
+  %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() nounwind
+  %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() nounwind
+  %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() nounwind
+  %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() nounwind
+  %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() nounwind
+  %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() nounwind
+  %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() nounwind
+  %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() nounwind
+  %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() nounwind
+  %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() nounwind
+  %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() nounwind
+  %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() nounwind
+  %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() nounwind
+  %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() nounwind
+  %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() nounwind
+  %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_lo}"() nounwind
+  %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_hi}"() nounwind
   %cmp = icmp eq i32 %cnd, 0
   br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
 
@@ -783,118 +783,118 @@ bb2: ; 68 bytes
     v_nop_e64
     v_nop_e64
     v_nop_e64
-    v_nop_e64",""() #0
+    v_nop_e64",""() nounwind
   br label %bb3
 
 bb3:
-  tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0
-  tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0
-  tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0
-  tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0
-  tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0
-  tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0
-  tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0
-  tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0
-  tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0
-  tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0
-  tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0
-  tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0
-  tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0
-  tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0
-  tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0
-  tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0
-  tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0
-  tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0
-  tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0
-  tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0
-  tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0
-  tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0
-  tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0
-  tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0
-  tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0
-  tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0
-  tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0
-  tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0
-  tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0
-  tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0
-  tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0
-  tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0
-  tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0
-  tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0
-  tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0
-  tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0
-  tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0
-  tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0
-  tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0
-  tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0
-  tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0
-  tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0
-  tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0
-  tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0
-  tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0
-  tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0
-  tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0
-  tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0
-  tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0
-  tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0
-  tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0
-  tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0
-  tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0
-  tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0
-  tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0
-  tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0
-  tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0
-  tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0
-  tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0
-  tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0
-  tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0
-  tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0
-  tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0
-  tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0
-  tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0
-  tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0
-  tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0
-  tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0
-  tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0
-  tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0
-  tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0
-  tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0
-  tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0
-  tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0
-  tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0
-  tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0
-  tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0
-  tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0
-  tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0
-  tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0
-  tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0
-  tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0
-  tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0
-  tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0
-  tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0
-  tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0
-  tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0
-  tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0
-  tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0
-  tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0
-  tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0
-  tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0
-  tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0
-  tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0
-  tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0
-  tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0
-  tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0
-  tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0
-  tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0
-  tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0
-  tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0
-  tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0
-  tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0
-  tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0
+  tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) nounwind
+  tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) nounwind
+  tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) nounwind
   ret void
 }
 
-define void @spill_func(ptr addrspace(1) %arg) #0 {
+define void @spill_func(ptr addrspace(1) %arg) nounwind {
 ; CHECK-LABEL: spill_func:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1708,111 +1708,111 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %cnd = tail call i32 @llvm.amdgcn.workgroup.id.x() #0
-  %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0
-  %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0
-  %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0
-  %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0
-  %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0
-  %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0
-  %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0
-  %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0
-  %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0
-  %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0
-  %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0
-  %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0
-  %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0
-  %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0
-  %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0
-  %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0
-  %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0
-  %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0
-  %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0
-  %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0
-  %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0
-  %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0
-  %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0
-  %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0
-  %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0
-  %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0
-  %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0
-  %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0
-  %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0
-  %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0
-  %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0
-  %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0
-  %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0
-  %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0
-  %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0
-  %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0
-  %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0
-  %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0
-  %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0
-  %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0
-  %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0
-  %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0
-  %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0
-  %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0
-  %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0
-  %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0
-  %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0
-  %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0
-  %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0
-  %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0
-  %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0
-  %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0
-  %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0
-  %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0
-  %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0
-  %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0
-  %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0
-  %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0
-  %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0
-  %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0
-  %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0
-  %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0
-  %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0
-  %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0
-  %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0
-  %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0
-  %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0
-  %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0
-  %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0
-  %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0
-  %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0
-  %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0
-  %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0
-  %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0
-  %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0
-  %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0
-  %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0
-  %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0
-  %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0
-  %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0
-  %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0
-  %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0
-  %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0
-  %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0
-  %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0
-  %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0
-  %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0
-  %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0
-  %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0
-  %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0
-  %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0
-  %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0
-  %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0
-  %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0
-  %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0
-  %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0
-  %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0
-  %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0
-  %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0
-  %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0
-  %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0
-  %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0
-  %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_lo}"() #0
-  %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_hi}"() #0
+  %cnd = tail call i32 @llvm.amdgcn.workgroup.id.x() nounwind
+  %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() nounwind
+  %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() nounwind
+  %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() nounwind
+  %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() nounwind
+  %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() nounwind
+  %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() nounwind
+  %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() nounwind
+  %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() nounwind
+  %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() nounwind
+  %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() nounwind
+  %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() nounwind
+  %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() nounwind
+  %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() nounwind
+  %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() nounwind
+  %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() nounwind
+  %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() nounwind
+  %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() nounwind
+  %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() nounwind
+  %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() nounwind
+  %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() nounwind
+  %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() nounwind
+  %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() nounwind
+  %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() nounwind
+  %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() nounwind
+  %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() nounwind
+  %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() nounwind
+  %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() nounwind
+  %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() nounwind
+  %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() nounwind
+  %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() nounwind
+  %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() nounwind
+  %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() nounwind
+  %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() nounwind
+  %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() nounwind
+  %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() nounwind
+  %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() nounwind
+  %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() nounwind
+  %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() nounwind
+  %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() nounwind
+  %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() nounwind
+  %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() nounwind
+  %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() nounwind
+  %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() nounwind
+  %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() nounwind
+  %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() nounwind
+  %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() nounwind
+  %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() nounwind
+  %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() nounwind
+  %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() nounwind
+  %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() nounwind
+  %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() nounwind
+  %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() nounwind
+  %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() nounwind
+  %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() nounwind
+  %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() nounwind
+  %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() nounwind
+  %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() nounwind
+  %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() nounwind
+  %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() nounwind
+  %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() nounwind
+  %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() nounwind
+  %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() nounwind
+  %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() nounwind
+  %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() nounwind
+  %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() nounwind
+  %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() nounwind
+  %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() nounwind
+  %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() nounwind
+  %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() nounwind
+  %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() nounwind
+  %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() nounwind
+  %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() nounwind
+  %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() nounwind
+  %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() nounwind
+  %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() nounwind
+  %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() nounwind
+  %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() nounwind
+  %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() nounwind
+  %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() nounwind
+  %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() nounwind
+  %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() nounwind
+  %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() nounwind
+  %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() nounwind
+  %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() nounwind
+  %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() nounwind
+  %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() nounwind
+  %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() nounwind
+  %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() nounwind
+  %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() nounwind
+  %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() nounwind
+  %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() nounwind
+  %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() nounwind
+  %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() nounwind
+  %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() nounwind
+  %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() nounwind
+  %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() nounwind
+  %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() nounwind
+  %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() nounwind
+  %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() nounwind
+  %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() nounwind
+  %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() nounwind
+  %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() nounwind
+  %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_lo}"() nounwind
+  %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={vcc_hi}"() nounwind
   %cmp = icmp eq i32 %cnd, 0
   br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
 
@@ -1826,117 +1826,115 @@ bb2: ; 68 bytes
     v_nop_e64
     v_nop_e64
     v_nop_e64
-    v_nop_e64",""() #0
+    v_nop_e64",""() nounwind
   br label %bb3
 
 bb3:
-  tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0
-  tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0
-  tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0
-  tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0
-  tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0
-  tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0
-  tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0
-  tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0
-  tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0
-  tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0
-  tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0
-  tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0
-  tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0
-  tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0
-  tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0
-  tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0
-  tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0
-  tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0
-  tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0
-  tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0
-  tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0
-  tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0
-  tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0
-  tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0
-  tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0
-  tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0
-  tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0
-  tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0
-  tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0
-  tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0
-  tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0
-  tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0
-  tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0
-  tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0
-  tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0
-  tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0
-  tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0
-  tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0
-  tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0
-  tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0
-  tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0
-  tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0
-  tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0
-  tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0
-  tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0
-  tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0
-  tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0
-  tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0
-  tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0
-  tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0
-  tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0
-  tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0
-  tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0
-  tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0
-  tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0
-  tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0
-  tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0
-  tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0
-  tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0
-  tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0
-  tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0
-  tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0
-  tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0
-  tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0
-  tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0
-  tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0
-  tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0
-  tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0
-  tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0
-  tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0
-  tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0
-  tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0
-  tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0
-  tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0
-  tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0
-  tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0
-  tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0
-  tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0
-  tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0
-  tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0
-  tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0
-  tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0
-  tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0
-  tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0
-  tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0
-  tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0
-  tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0
-  tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0
-  tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0
-  tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0
-  tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0
-  tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0
-  tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0
-  tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0
-  tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0
-  tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0
-  tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0
-  tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0
-  tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0
-  tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0
-  tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0
-  tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0
-  tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0
-  tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0
+  tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) nounwind
+  tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) nounwind
+  tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) nounwind
+  tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) nounwind
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
index b427b011f50519..bfb47d4b7cf7d6 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
@@ -15,7 +15,7 @@
 
 --- |
 
-  define amdgpu_kernel void @long_branch_dbg_value(ptr addrspace(1) nocapture %arg, float %arg1) #1 !dbg !5 {
+  define amdgpu_kernel void @long_branch_dbg_value(ptr addrspace(1) nocapture %arg, float %arg1) nounwind writeonly !dbg !5 {
   bb:
     %long_branch_dbg_value.kernarg.segment = call nonnull align 16 dereferenceable(12) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
     %arg.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %long_branch_dbg_value.kernarg.segment, i64 0
@@ -26,7 +26,7 @@
     %arg1.load = load float, ptr addrspace(4) %arg1.kernarg.offset.cast, align 8, !invariant.load !2
     %tmp = fmul float %arg1.load, %arg1.load
     %tmp2 = getelementptr inbounds float, ptr addrspace(1) %arg.load, i64 3
-    call void @llvm.dbg.value(metadata ptr addrspace(1) %tmp2, metadata !11, metadata !DIExpression()) #5, !dbg !12
+    call void @llvm.dbg.value(metadata ptr addrspace(1) %tmp2, metadata !11, metadata !DIExpression()) nounwind, !dbg !12
     store float %tmp, ptr addrspace(1) %tmp2, align 4, !dbg !12
     %tmp3 = fcmp olt float %tmp, 0x3810000000000000
     %tmp3.inv = xor i1 %tmp3, true
@@ -47,15 +47,8 @@
     ret void
   }
 
-  declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
-  declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
-  attributes #0 = { nounwind readnone speculatable willreturn }
-  attributes #1 = { nounwind writeonly }
-  attributes #2 = { nounwind readnone speculatable willreturn }
-  attributes #3 = { convergent nounwind willreturn }
-  attributes #4 = { convergent nounwind readnone willreturn }
-  attributes #5 = { nounwind }
+  declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone speculatable willreturn
+  declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone speculatable willreturn
 
   !llvm.dbg.cu = !{!0}
   !llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 2f637df4e93022..16dd54709d7200 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -14,12 +14,12 @@
 
 ; Used to emit an always 4 byte instruction. Inline asm always assumes
 ; each instruction is the maximum size.
-declare void @llvm.amdgcn.s.sleep(i32) #0
+declare void @llvm.amdgcn.s.sleep(i32) nounwind
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 
-define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 {
+define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addrspace(1) %arg, i32 %cnd) nounwind {
 ; GCN-LABEL: uniform_conditional_max_short_forward_branch:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -51,7 +51,7 @@ bb2:
   call void asm sideeffect
   "v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   call void @llvm.amdgcn.s.sleep(i32 0)
   br label %bb3
 
@@ -60,7 +60,7 @@ bb3:
   ret void
 }
 
-define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 {
+define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(ptr addrspace(1) %arg, i32 %cnd) nounwind {
 ; GCN-LABEL: uniform_conditional_min_long_forward_branch:
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -99,7 +99,7 @@ bb2:
   "v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %bb3
 
 bb3:
@@ -107,7 +107,7 @@ bb3:
   ret void
 }
 
-define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(ptr addrspace(1) %arg, float %cnd) #0 {
+define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(ptr addrspace(1) %arg, float %cnd) nounwind {
 ; GCN-LABEL: uniform_conditional_min_long_forward_vcnd_branch:
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -147,7 +147,7 @@ bb2:
   v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %bb3
 
 bb3:
@@ -155,7 +155,7 @@ bb3:
   ret void
 }
 
-define amdgpu_kernel void @min_long_forward_vbranch(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @min_long_forward_vbranch(ptr addrspace(1) %arg) nounwind {
 ; GCN-LABEL: min_long_forward_vbranch:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -206,7 +206,7 @@ bb2:
   v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %bb3
 
 bb3:
@@ -214,7 +214,7 @@ bb3:
   ret void
 }
 
-define amdgpu_kernel void @long_backward_sbranch(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @long_backward_sbranch(ptr addrspace(1) %arg) nounwind {
 ; GCN-LABEL: long_backward_sbranch:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_mov_b32 s0, 0
@@ -246,7 +246,7 @@ bb2:
   call void asm sideeffect
   "v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   %inc = add nsw i32 %loop.idx, 1 ; add cost 4
   %cmp = icmp slt i32 %inc, 10 ; condition cost = 8
   br i1 %cmp, label %bb2, label %bb3 ; -
@@ -325,7 +325,7 @@ bb3:
   "v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %bb4
 
 bb4:
@@ -365,14 +365,14 @@ loop:
   "v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %loop
 }
 
 ; Expansion of branch from %bb1 to %bb3 introduces need to expand
 ; branch from %bb0 to %bb2
 
-define amdgpu_kernel void @expand_requires_expand(i32 %cond0) #0 {
+define amdgpu_kernel void @expand_requires_expand(i32 %cond0) nounwind {
 ; GCN-LABEL: expand_requires_expand:
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_load_dword s0, s[0:1], 0x9
@@ -411,7 +411,7 @@ define amdgpu_kernel void @expand_requires_expand(i32 %cond0) #0 {
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    s_endpgm
 bb0:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %cmp0 = icmp slt i32 %cond0, 0
   br i1 %cmp0, label %bb2, label %bb1
 
@@ -425,22 +425,22 @@ bb2:
   "v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %bb3
 
 bb3:
 ; These NOPs prevent tail-duplication-based outlining
 ; from firing, which defeats the need to expand the branches and this test.
   call void asm sideeffect
-  "v_nop_e64", ""() #0
+  "v_nop_e64", ""() nounwind
   call void asm sideeffect
-  "v_nop_e64", ""() #0
+  "v_nop_e64", ""() nounwind
   ret void
 }
 
 ; Requires expanding of required skip branch.
 
-define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %cond) #0 {
+define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %cond) nounwind {
 ; GCN-LABEL: uniform_inside_divergent:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v0
@@ -493,7 +493,7 @@ endif:
 
 ; si_mask_branch
 
-define amdgpu_kernel void @analyze_mask_branch() #0 {
+define amdgpu_kernel void @analyze_mask_branch() nounwind {
 ; GCN-LABEL: analyze_mask_branch:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    ;;#ASMSTART
@@ -552,7 +552,7 @@ loop:
   %phi = phi float [ 0.000000e+00, %loop_body ], [ 1.000000e+00, %entry ]
   call void asm sideeffect
   "v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   %cmp1 = fcmp olt float %phi, 8.0
   br i1 %cmp1, label %loop_body, label %ret
 
@@ -561,7 +561,7 @@ loop_body:
   "v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %loop
 
 ret:
@@ -569,7 +569,7 @@ ret:
   ret void
 }
 
-define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i64 %arg5) #0 {
+define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i64 %arg5) nounwind {
 ; GCN-LABEL: long_branch_hang:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -651,7 +651,7 @@ bb13:                                             ; preds = %bb
   "v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br i1 %tmp6, label %bb19, label %bb14
 
 bb14:                                             ; preds = %bb13, %bb9
@@ -667,6 +667,3 @@ bb19:                                             ; preds = %bb14, %bb13, %bb9
   store i32 %tmp20, ptr addrspace(1) %tmp21, align 4
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
index f4cd19a2ffa802..c55ea11938fa07 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
@@ -324,315 +324,307 @@ define amdgpu_cs void @mmo_offsets0(ptr addrspace(6) inreg noalias align(16) der
 bb.0:
   %tmp0 = load <4 x i32>, ptr addrspace(6) %arg0, align 16, !invariant.load !0
   %tmp1 = load ptr addrspace(8), ptr addrspace(6) %arg0, align 16, !invariant.load !0
-  %buffer0 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 16, i1 false, i1 false) #0
-  %buffer1 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #0
-  %buffer2 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 1, i32 16, i1 false, i1 false) #0
-  %buffer3 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 16, i1 false, i1 false) #0
+  %buffer0 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 16, i1 false, i1 false) nounwind readonly
+  %buffer1 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) nounwind readonly
+  %buffer2 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 1, i32 16, i1 false, i1 false) nounwind readonly
+  %buffer3 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 16, i1 false, i1 false) nounwind readonly
 
   ; Insert inline asm to keep the different instruction types from being mixed.  This makes the output easier to read.
   call void asm sideeffect "", "" ()
 
-  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer0, <4 x i32> %tmp0, i32 0, i32 32, i1 false, i1 false) #1
-  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #1
-  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer2, <4 x i32> %tmp0, i32 1, i32 32, i1 false, i1 false) #1
-  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer3, <4 x i32> %tmp0, i32 %arg1, i32 32, i1 false, i1 false) #1
+  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer0, <4 x i32> %tmp0, i32 0, i32 32, i1 false, i1 false) nounwind writeonly
+  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) nounwind writeonly
+  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer2, <4 x i32> %tmp0, i32 1, i32 32, i1 false, i1 false) nounwind writeonly
+  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer3, <4 x i32> %tmp0, i32 %arg1, i32 32, i1 false, i1 false) nounwind writeonly
 
   call void asm sideeffect "", "" ()
 
-  %buffer_format0 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 48, i1 false, i1 false) #0
-  %buffer_format1 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #0
-  %buffer_format2 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 1, i32 48, i1 false, i1 false) #0
-  %buffer_format3 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 48, i1 false, i1 false) #0
+  %buffer_format0 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 48, i1 false, i1 false) nounwind readonly
+  %buffer_format1 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) nounwind readonly
+  %buffer_format2 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 1, i32 48, i1 false, i1 false) nounwind readonly
+  %buffer_format3 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 48, i1 false, i1 false) nounwind readonly
 
   call void asm sideeffect "", "" ()
 
-  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format0, <4 x i32> %tmp0, i32 0, i32 64, i1 false, i1 false) #1
-  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #1
-  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format2, <4 x i32> %tmp0, i32 1, i32 64, i1 false, i1 false) #1
-  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format3, <4 x i32> %tmp0, i32 %arg1, i32 64, i1 false, i1 false) #1
+  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format0, <4 x i32> %tmp0, i32 0, i32 64, i1 false, i1 false) nounwind writeonly
+  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) nounwind writeonly
+  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format2, <4 x i32> %tmp0, i32 1, i32 64, i1 false, i1 false) nounwind writeonly
+  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format3, <4 x i32> %tmp0, i32 %arg1, i32 64, i1 false, i1 false) nounwind writeonly
 
   call void asm sideeffect "", "" ()
 
-  %atomic_add0 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 80, i1 false) #2
-  %atomic_add1 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2
-  %atomic_add2 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 1, i32 80, i1 false) #2
-  %atomic_add3 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 80, i1 false) #2
+  %atomic_add0 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 80, i1 false) nounwind
+  %atomic_add1 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) nounwind
+  %atomic_add2 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 1, i32 80, i1 false) nounwind
+  %atomic_add3 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 80, i1 false) nounwind
 
   call void asm sideeffect "", "" ()
 
-  %atomic_cmpswap0 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 96, i1 false) #2
-  %atomic_cmpswap1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2
-  %atomic_cmpswap2 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 1, i32 96, i1 false) #2
-  %atomic_cmpswap3 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 96, i1 false) #2
+  %atomic_cmpswap0 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 96, i1 false) nounwind
+  %atomic_cmpswap1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) nounwind
+  %atomic_cmpswap2 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 1, i32 96, i1 false) nounwind
+  %atomic_cmpswap3 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 96, i1 false) nounwind
 
   call void asm sideeffect "", "" ()
 
-  %fadd1 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 112, i1 false) #2
-  %fadd2 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2
-  %fadd3 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 1, i32 112, i1 false) #2
-  %fadd4 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 %arg1, i32 112, i1 false) #2
+  %fadd1 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 112, i1 false) nounwind
+  %fadd2 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) nounwind
+  %fadd3 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 1, i32 112, i1 false) nounwind
+  %fadd4 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 %arg1, i32 112, i1 false) nounwind
 
   call void asm sideeffect "", "" ()
 
   ; rsrc, offset, soffset, cachepolicy
-  %raw_buffer0 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 128, i32 0, i32 0) #0
-  %raw_buffer1 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 64, i32 64, i32 0) #0
-  %raw_buffer2 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 128, i32 0) #0
-  %raw_buffer3 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 128, i32 0) #0
-  %raw_buffer4 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 128, i32 %arg1, i32 0) #0
+  %raw_buffer0 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 128, i32 0, i32 0) nounwind readonly
+  %raw_buffer1 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 64, i32 64, i32 0) nounwind readonly
+  %raw_buffer2 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 128, i32 0) nounwind readonly
+  %raw_buffer3 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 128, i32 0) nounwind readonly
+  %raw_buffer4 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 128, i32 %arg1, i32 0) nounwind readonly
 
   call void asm sideeffect "", "" ()
 
   ; rsrc, offset, soffset, cachepolicy
-  %raw_ptr_buffer0 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 128, i32 0, i32 0) #3
-  %raw_ptr_buffer1 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 64, i32 64, i32 0) #3
-  %raw_ptr_buffer2 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 128, i32 0) #3
-  %raw_ptr_buffer3 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 128, i32 0) #3
-  %raw_ptr_buffer4 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 128, i32 %arg1, i32 0) #3
+  %raw_ptr_buffer0 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 128, i32 0, i32 0) nounwind memory(argmem: read)
+  %raw_ptr_buffer1 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 64, i32 64, i32 0) nounwind memory(argmem: read)
+  %raw_ptr_buffer2 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 128, i32 0) nounwind memory(argmem: read)
+  %raw_ptr_buffer3 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 128, i32 0) nounwind memory(argmem: read)
+  %raw_ptr_buffer4 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 128, i32 %arg1, i32 0) nounwind memory(argmem: read)
 
   call void asm sideeffect "", "" ()
 
-  %raw_buffer_format0 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 144, i32 0, i32 0) #0
-  %raw_buffer_format1 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 72, i32 72, i32 0) #0
-  %raw_buffer_format2 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 144, i32 0) #0
-  %raw_buffer_format3 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 144, i32 0) #0
-  %raw_buffer_format4 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 144, i32 %arg1, i32 0) #0
+  %raw_buffer_format0 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 144, i32 0, i32 0) nounwind readonly
+  %raw_buffer_format1 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 72, i32 72, i32 0) nounwind readonly
+  %raw_buffer_format2 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 144, i32 0) nounwind readonly
+  %raw_buffer_format3 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 144, i32 0) nounwind readonly
+  %raw_buffer_format4 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 144, i32 %arg1, i32 0) nounwind readonly
 
   call void asm sideeffect "", "" ()
 
-  %raw_buffer_format_ptr0 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 144, i32 0, i32 0) #3
-  %raw_buffer_format_ptr1 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 72, i32 72, i32 0) #3
-  %raw_buffer_format_ptr2 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 144, i32 0) #3
-  %raw_buffer_format_ptr3 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 144, i32 0) #3
-  %raw_buffer_format_ptr4 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 144, i32 %arg1, i32 0) #3
+  %raw_buffer_format_ptr0 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 144, i32 0, i32 0) nounwind memory(argmem: read)
+  %raw_buffer_format_ptr1 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 72, i32 72, i32 0) nounwind memory(argmem: read)
+  %raw_buffer_format_ptr2 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 144, i32 0) nounwind memory(argmem: read)
+  %raw_buffer_format_ptr3 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 144, i32 0) nounwind memory(argmem: read)
+  %raw_buffer_format_ptr4 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 144, i32 %arg1, i32 0) nounwind memory(argmem: read)
 
   call void asm sideeffect "", "" ()
 
-  %raw_atomic_add0 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 160, i32 0, i32 0) #2
-  %raw_atomic_add1 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 80, i32 80, i32 0) #2
-  %raw_atomic_add2 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 160, i32 0) #2
-  %raw_atomic_add3 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 160, i32 0) #2
-  %raw_atomic_add4 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 160, i32 %arg1, i32 0) #2
+  %raw_atomic_add0 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 160, i32 0, i32 0) nounwind
+  %raw_atomic_add1 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 80, i32 80, i32 0) nounwind
+  %raw_atomic_add2 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 160, i32 0) nounwind
+  %raw_atomic_add3 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 160, i32 0) nounwind
+  %raw_atomic_add4 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 160, i32 %arg1, i32 0) nounwind
 
   call void asm sideeffect "", "" ()
 
-  %raw_ptr_atomic_add0 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 160, i32 0, i32 0) #5
-  %raw_ptr_atomic_add1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 80, i32 80, i32 0) #5
-  %raw_ptr_atomic_add2 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 160, i32 0) #5
-  %raw_ptr_atomic_add3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 160, i32 0) #5
-  %raw_ptr_atomic_add4 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 160, i32 %arg1, i32 0) #5
+  %raw_ptr_atomic_add0 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 160, i32 0, i32 0) nounwind memory(argmem: readwrite)
+  %raw_ptr_atomic_add1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 80, i32 80, i32 0) nounwind memory(argmem: readwrite)
+  %raw_ptr_atomic_add2 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 160, i32 0) nounwind memory(argmem: readwrite)
+  %raw_ptr_atomic_add3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 160, i32 0) nounwind memory(argmem: readwrite)
+  %raw_ptr_atomic_add4 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 160, i32 %arg1, i32 0) nounwind memory(argmem: readwrite)
 
   call void asm sideeffect "", "" ()
 
-  %raw_atomic_cmpswap0 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 176, i32 0, i32 0) #2
-  %raw_atomic_cmpswap1 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 88, i32 88, i32 0) #2
-  %raw_atomic_cmpswap2 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 176, i32 0) #2
-  %raw_atomic_cmpswap3 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 176, i32 0) #2
-  %raw_atomic_cmpswap4 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 176, i32 %arg1, i32 0) #2
+  %raw_atomic_cmpswap0 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 176, i32 0, i32 0) nounwind
+  %raw_atomic_cmpswap1 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 88, i32 88, i32 0) nounwind
+  %raw_atomic_cmpswap2 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 176, i32 0) nounwind
+  %raw_atomic_cmpswap3 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 176, i32 0) nounwind
+  %raw_atomic_cmpswap4 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 176, i32 %arg1, i32 0) nounwind
 
   call void asm sideeffect "", "" ()
 
-  %raw_ptr_atomic_cmpswap0 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 176, i32 0, i32 0) #5
-  %raw_ptr_atomic_cmpswap1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 88, i32 88, i32 0) #5
-  %raw_ptr_atomic_cmpswap2 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 176, i32 0) #5
-  %raw_ptr_atomic_cmpswap3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 176, i32 0) #5
-  %raw_ptr_atomic_cmpswap4 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 176, i32 %arg1, i32 0) #5
+  %raw_ptr_atomic_cmpswap0 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 176, i32 0, i32 0) nounwind memory(argmem: readwrite)
+  %raw_ptr_atomic_cmpswap1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 88, i32 88, i32 0) nounwind memory(argmem: readwrite)
+  %raw_ptr_atomic_cmpswap2 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 176, i32 0) nounwind memory(argmem: readwrite)
+  %raw_ptr_atomic_cmpswap3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 176, i32 0) nounwind memory(argmem: readwrite)
+  %raw_ptr_atomic_cmpswap4 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 176, i32 %arg1, i32 0) nounwind memory(argmem: readwrite)
 
   call void asm sideeffect "", "" ()
 
-  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer0, <4 x i32> %tmp0, i32 192, i32 0, i32 0) #2
-  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer1, <4 x i32> %tmp0, i32 96, i32 96, i32 0) #2
-  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer2, <4 x i32> %tmp0, i32 0, i32 192, i32 0) #2
-  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer3, <4 x i32> %tmp0, i32 %arg1, i32 192, i32 0) #2
-  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer4, <4 x i32> %tmp0, i32 192, i32 %arg1, i32 0) #2
+  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer0, <4 x i32> %tmp0, i32 192, i32 0, i32 0) nounwind
+  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer1, <4 x i32> %tmp0, i32 96, i32 96, i32 0) nounwind
+  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer2, <4 x i32> %tmp0, i32 0, i32 192, i32 0) nounwind
+  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer3, <4 x i32> %tmp0, i32 %arg1, i32 192, i32 0) nounwind
+  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer4, <4 x i32> %tmp0, i32 192, i32 %arg1, i32 0) nounwind
 
   call void asm sideeffect "", "" ()
 
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer0, ptr addrspace(8) %tmp1, i32 192, i32 0, i32 0) #5
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer1, ptr addrspace(8) %tmp1, i32 96, i32 96, i32 0) #5
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer2, ptr addrspace(8) %tmp1, i32 0, i32 192, i32 0) #5
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer3, ptr addrspace(8) %tmp1, i32 %arg1, i32 192, i32 0) #5
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer4, ptr addrspace(8) %tmp1, i32 192, i32 %arg1, i32 0) #5
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer0, ptr addrspace(8) %tmp1, i32 192, i32 0, i32 0) nounwind memory(argmem: readwrite)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer1, ptr addrspace(8) %tmp1, i32 96, i32 96, i32 0) nounwind memory(argmem: readwrite)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer2, ptr addrspace(8) %tmp1, i32 0, i32 192, i32 0) nounwind memory(argmem: readwrite)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer3, ptr addrspace(8) %tmp1, i32 %arg1, i32 192, i32 0) nounwind memory(argmem: readwrite)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %raw_ptr_buffer4, ptr addrspace(8) %tmp1, i32 192, i32 %arg1, i32 0) nounwind memory(argmem: readwrite)
 
   call void asm sideeffect "", "" ()
 
-  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format0, <4 x i32> %tmp0, i32 208, i32 0, i32 0) #2
-  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format1, <4 x i32> %tmp0, i32 104, i32 104, i32 0) #2
-  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format2, <4 x i32> %tmp0, i32 0, i32 208, i32 0) #2
-  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format3, <4 x i32> %tmp0, i32 %arg1, i32 208, i32 0) #2
-  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format4, <4 x i32> %tmp0, i32 208, i32 %arg1, i32 0) #2
+  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format0, <4 x i32> %tmp0, i32 208, i32 0, i32 0) nounwind
+  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format1, <4 x i32> %tmp0, i32 104, i32 104, i32 0) nounwind
+  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format2, <4 x i32> %tmp0, i32 0, i32 208, i32 0) nounwind
+  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format3, <4 x i32> %tmp0, i32 %arg1, i32 208, i32 0) nounwind
+  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format4, <4 x i32> %tmp0, i32 208, i32 %arg1, i32 0) nounwind
 
   call void asm sideeffect "", "" ()
 
-  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr0, ptr addrspace(8) %tmp1, i32 208, i32 0, i32 0) #4
-  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr1, ptr addrspace(8) %tmp1, i32 104, i32 104, i32 0) #4
-  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr2, ptr addrspace(8) %tmp1, i32 0, i32 208, i32 0) #4
-  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr3, ptr addrspace(8) %tmp1, i32 %arg1, i32 208, i32 0) #4
-  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr4, ptr addrspace(8) %tmp1, i32 208, i32 %arg1, i32 0) #4
+  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr0, ptr addrspace(8) %tmp1, i32 208, i32 0, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr1, ptr addrspace(8) %tmp1, i32 104, i32 104, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr2, ptr addrspace(8) %tmp1, i32 0, i32 208, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr3, ptr addrspace(8) %tmp1, i32 %arg1, i32 208, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %raw_buffer_format_ptr4, ptr addrspace(8) %tmp1, i32 208, i32 %arg1, i32 0) nounwind memory(argmem: write)
 
   call void asm sideeffect "", "" ()
 
   ; rsrc, vindex, offset, soffset, cachepolicy
-  %struct_buffer0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 224, i32 0, i32 0) #0
-  %struct_buffer1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 112, i32 112, i32 0) #0
-  %struct_buffer2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 0, i32 224, i32 0) #0
-  %struct_buffer3 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i32 224, i32 0) #0
-  %struct_buffer4 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 224, i32 %arg1, i32 0) #0
-  %struct_buffer5 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 1, i32 224, i32 0, i32 0) #0
-  %struct_buffer6 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 224, i32 0, i32 0) #0
+  %struct_buffer0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 224, i32 0, i32 0) nounwind readonly
+  %struct_buffer1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 112, i32 112, i32 0) nounwind readonly
+  %struct_buffer2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 0, i32 224, i32 0) nounwind readonly
+  %struct_buffer3 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i32 224, i32 0) nounwind readonly
+  %struct_buffer4 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 224, i32 %arg1, i32 0) nounwind readonly
+  %struct_buffer5 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 1, i32 224, i32 0, i32 0) nounwind readonly
+  %struct_buffer6 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 224, i32 0, i32 0) nounwind readonly
 
   call void asm sideeffect "", "" ()
 
-  %struct_ptr_buffer0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 224, i32 0, i32 0) #3
-  %struct_ptr_buffer1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 112, i32 112, i32 0) #3
-  %struct_ptr_buffer2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 0, i32 224, i32 0) #3
-  %struct_ptr_buffer3 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 224, i32 0) #3
-  %struct_ptr_buffer4 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 224, i32 %arg1, i32 0) #3
-  %struct_ptr_buffer5 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 1, i32 224, i32 0, i32 0) #3
-  %struct_ptr_buffer6 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 224, i32 0, i32 0) #3
+  %struct_ptr_buffer0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 224, i32 0, i32 0) nounwind memory(argmem: read)
+  %struct_ptr_buffer1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 112, i32 112, i32 0) nounwind memory(argmem: read)
+  %struct_ptr_buffer2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 0, i32 224, i32 0) nounwind memory(argmem: read)
+  %struct_ptr_buffer3 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 224, i32 0) nounwind memory(argmem: read)
+  %struct_ptr_buffer4 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 224, i32 %arg1, i32 0) nounwind memory(argmem: read)
+  %struct_ptr_buffer5 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 1, i32 224, i32 0, i32 0) nounwind memory(argmem: read)
+  %struct_ptr_buffer6 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 224, i32 0, i32 0) nounwind memory(argmem: read)
 
   call void asm sideeffect "", "" ()
 
-  %struct_buffer_format0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 240, i32 0, i32 0) #0
-  %struct_buffer_format1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 120, i32 120, i32 0) #0
-  %struct_buffer_format2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 0, i32 240, i32 0) #0
-  %struct_buffer_format3 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i32 240, i32 0) #0
-  %struct_buffer_format4 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 240, i32 %arg1, i32 0) #0
-  %struct_buffer_format5 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 1, i32 240, i32 0, i32 0) #0
-  %struct_buffer_format6 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 240, i32 0, i32 0) #0
+  %struct_buffer_format0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 240, i32 0, i32 0) nounwind readonly
+  %struct_buffer_format1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 120, i32 120, i32 0) nounwind readonly
+  %struct_buffer_format2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 0, i32 240, i32 0) nounwind readonly
+  %struct_buffer_format3 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i32 240, i32 0) nounwind readonly
+  %struct_buffer_format4 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 240, i32 %arg1, i32 0) nounwind readonly
+  %struct_buffer_format5 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 1, i32 240, i32 0, i32 0) nounwind readonly
+  %struct_buffer_format6 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 240, i32 0, i32 0) nounwind readonly
 
   call void asm sideeffect "", "" ()
 
-  %struct_buffer_format_ptr0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 240, i32 0, i32 0) #3
-  %struct_buffer_format_ptr1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 120, i32 120, i32 0) #3
-  %struct_buffer_format_ptr2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 0, i32 240, i32 0) #3
-  %struct_buffer_format_ptr3 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 240, i32 0) #3
-  %struct_buffer_format_ptr4 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 240, i32 %arg1, i32 0) #3
-  %struct_buffer_format_ptr5 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 1, i32 240, i32 0, i32 0) #3
-  %struct_buffer_format_ptr6 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 240, i32 0, i32 0) #3
+  %struct_buffer_format_ptr0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 240, i32 0, i32 0) nounwind memory(argmem: read)
+  %struct_buffer_format_ptr1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 120, i32 120, i32 0) nounwind memory(argmem: read)
+  %struct_buffer_format_ptr2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 0, i32 240, i32 0) nounwind memory(argmem: read)
+  %struct_buffer_format_ptr3 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 240, i32 0) nounwind memory(argmem: read)
+  %struct_buffer_format_ptr4 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 0, i32 240, i32 %arg1, i32 0) nounwind memory(argmem: read)
+  %struct_buffer_format_ptr5 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 1, i32 240, i32 0, i32 0) nounwind memory(argmem: read)
+  %struct_buffer_format_ptr6 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp1, i32 %arg1, i32 240, i32 0, i32 0) nounwind memory(argmem: read)
 
   call void asm sideeffect "", "" ()
 
-  %struct_atomic_add0 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 256, i32 0, i32 0) #2
-  %struct_atomic_add1 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 128, i32 128, i32 0) #2
-  %struct_atomic_add2 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 0, i32 256, i32 0) #2
-  %struct_atomic_add3 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 256, i32 0) #2
-  %struct_atomic_add4 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 256, i32 %arg1, i32 0) #2
-  %struct_atomic_add5 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 1, i32 256, i32 0, i32 0) #2
-  %struct_atomic_add6 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 256, i32 0, i32 0) #2
+  %struct_atomic_add0 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 256, i32 0, i32 0) nounwind
+  %struct_atomic_add1 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 128, i32 128, i32 0) nounwind
+  %struct_atomic_add2 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 0, i32 256, i32 0) nounwind
+  %struct_atomic_add3 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 256, i32 0) nounwind
+  %struct_atomic_add4 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 256, i32 %arg1, i32 0) nounwind
+  %struct_atomic_add5 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 1, i32 256, i32 0, i32 0) nounwind
+  %struct_atomic_add6 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 256, i32 0, i32 0) nounwind
 
   call void asm sideeffect "", "" ()
 
-  %struct_atomic_add_ptr0 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 256, i32 0, i32 0) #5
-  %struct_atomic_add_ptr1 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 128, i32 128, i32 0) #5
-  %struct_atomic_add_ptr2 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 256, i32 0) #5
-  %struct_atomic_add_ptr3 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 256, i32 0) #5
-  %struct_atomic_add_ptr4 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 256, i32 %arg1, i32 0) #5
-  %struct_atomic_add_ptr5 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 1, i32 256, i32 0, i32 0) #5
-  %struct_atomic_add_ptr6 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 256, i32 0, i32 0) #5
+  %struct_atomic_add_ptr0 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 256, i32 0, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_add_ptr1 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 128, i32 128, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_add_ptr2 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 256, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_add_ptr3 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 256, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_add_ptr4 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 256, i32 %arg1, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_add_ptr5 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 1, i32 256, i32 0, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_add_ptr6 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 256, i32 0, i32 0) nounwind memory(argmem: readwrite)
 
   call void asm sideeffect "", "" ()
 
-  %struct_atomic_cmpswap0 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 272, i32 0, i32 0) #2
-  %struct_atomic_cmpswap1 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 136, i32 136, i32 0) #2
-  %struct_atomic_cmpswap2 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 0, i32 272, i32 0) #2
-  %struct_atomic_cmpswap3 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 272, i32 0) #2
-  %struct_atomic_cmpswap4 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 272, i32 %arg1, i32 0) #2
-  %struct_atomic_cmpswap5 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 1, i32 272, i32 0, i32 0) #2
-  %struct_atomic_cmpswap6 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 272, i32 0, i32 0) #2
+  %struct_atomic_cmpswap0 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 272, i32 0, i32 0) nounwind
+  %struct_atomic_cmpswap1 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 136, i32 136, i32 0) nounwind
+  %struct_atomic_cmpswap2 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 0, i32 272, i32 0) nounwind
+  %struct_atomic_cmpswap3 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 272, i32 0) nounwind
+  %struct_atomic_cmpswap4 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 272, i32 %arg1, i32 0) nounwind
+  %struct_atomic_cmpswap5 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 1, i32 272, i32 0, i32 0) nounwind
+  %struct_atomic_cmpswap6 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 272, i32 0, i32 0) nounwind
 
   call void asm sideeffect "", "" ()
 
-  %struct_atomic_cmpswap_ptr0 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 272, i32 0, i32 0) #5
-  %struct_atomic_cmpswap_ptr1 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 136, i32 136, i32 0) #5
-  %struct_atomic_cmpswap_ptr2 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 272, i32 0) #5
-  %struct_atomic_cmpswap_ptr3 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 272, i32 0) #5
-  %struct_atomic_cmpswap_ptr4 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 272, i32 %arg1, i32 0) #5
-  %struct_atomic_cmpswap_ptr5 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 1, i32 272, i32 0, i32 0) #5
-  %struct_atomic_cmpswap_ptr6 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 272, i32 0, i32 0) #5
+  %struct_atomic_cmpswap_ptr0 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 272, i32 0, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_cmpswap_ptr1 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 136, i32 136, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_cmpswap_ptr2 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 272, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_cmpswap_ptr3 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 272, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_cmpswap_ptr4 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 0, i32 272, i32 %arg1, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_cmpswap_ptr5 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 1, i32 272, i32 0, i32 0) nounwind memory(argmem: readwrite)
+  %struct_atomic_cmpswap_ptr6 = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, ptr addrspace(8) %tmp1, i32 %arg1, i32 272, i32 0, i32 0) nounwind memory(argmem: readwrite)
 
   call void asm sideeffect "", "" ()
 
-  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer0, <4 x i32> %tmp0, i32 0, i32 288, i32 0, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer1, <4 x i32> %tmp0, i32 0, i32 144, i32 144, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer2, <4 x i32> %tmp0, i32 0, i32 0, i32 288, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer3, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 288, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer4, <4 x i32> %tmp0, i32 0, i32 288, i32 %arg1, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer5, <4 x i32> %tmp0, i32 1, i32 288, i32 0, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer6, <4 x i32> %tmp0, i32 %arg1, i32 288, i32 0, i32 0) #2
+  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer0, <4 x i32> %tmp0, i32 0, i32 288, i32 0, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer1, <4 x i32> %tmp0, i32 0, i32 144, i32 144, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer2, <4 x i32> %tmp0, i32 0, i32 0, i32 288, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer3, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 288, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer4, <4 x i32> %tmp0, i32 0, i32 288, i32 %arg1, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer5, <4 x i32> %tmp0, i32 1, i32 288, i32 0, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer6, <4 x i32> %tmp0, i32 %arg1, i32 288, i32 0, i32 0) nounwind
 
   call void asm sideeffect "", "" ()
 
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer0, ptr addrspace(8) %tmp1, i32 0, i32 288, i32 0, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer1, ptr addrspace(8) %tmp1, i32 0, i32 144, i32 144, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer2, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 288, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer3, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 288, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer4, ptr addrspace(8) %tmp1, i32 0, i32 288, i32 %arg1, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer5, ptr addrspace(8) %tmp1, i32 1, i32 288, i32 0, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer6, ptr addrspace(8) %tmp1, i32 %arg1, i32 288, i32 0, i32 0) #4
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer0, ptr addrspace(8) %tmp1, i32 0, i32 288, i32 0, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer1, ptr addrspace(8) %tmp1, i32 0, i32 144, i32 144, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer2, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 288, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer3, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 288, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer4, ptr addrspace(8) %tmp1, i32 0, i32 288, i32 %arg1, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer5, ptr addrspace(8) %tmp1, i32 1, i32 288, i32 0, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> %struct_ptr_buffer6, ptr addrspace(8) %tmp1, i32 %arg1, i32 288, i32 0, i32 0) nounwind memory(argmem: write)
 
   call void asm sideeffect "", "" ()
 
-  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format0, <4 x i32> %tmp0, i32 0, i32 304, i32 0, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format1, <4 x i32> %tmp0, i32 0, i32 152, i32 152, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format2, <4 x i32> %tmp0, i32 0, i32 0, i32 304, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format3, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 304, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format4, <4 x i32> %tmp0, i32 0, i32 304, i32 %arg1, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format5, <4 x i32> %tmp0, i32 1, i32 304, i32 0, i32 0) #2
-  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format6, <4 x i32> %tmp0, i32 %arg1, i32 304, i32 0, i32 0) #2
-
-  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr0, ptr addrspace(8) %tmp1, i32 0, i32 304, i32 0, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr1, ptr addrspace(8) %tmp1, i32 0, i32 152, i32 152, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr2, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 304, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr3, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 304, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr4, ptr addrspace(8) %tmp1, i32 0, i32 304, i32 %arg1, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr5, ptr addrspace(8) %tmp1, i32 1, i32 304, i32 0, i32 0) #4
-  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr6, ptr addrspace(8) %tmp1, i32 %arg1, i32 304, i32 0, i32 0) #4
+  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format0, <4 x i32> %tmp0, i32 0, i32 304, i32 0, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format1, <4 x i32> %tmp0, i32 0, i32 152, i32 152, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format2, <4 x i32> %tmp0, i32 0, i32 0, i32 304, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format3, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 304, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format4, <4 x i32> %tmp0, i32 0, i32 304, i32 %arg1, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format5, <4 x i32> %tmp0, i32 1, i32 304, i32 0, i32 0) nounwind
+  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format6, <4 x i32> %tmp0, i32 %arg1, i32 304, i32 0, i32 0) nounwind
+
+  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr0, ptr addrspace(8) %tmp1, i32 0, i32 304, i32 0, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr1, ptr addrspace(8) %tmp1, i32 0, i32 152, i32 152, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr2, ptr addrspace(8) %tmp1, i32 0, i32 0, i32 304, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr3, ptr addrspace(8) %tmp1, i32 0, i32 %arg1, i32 304, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr4, ptr addrspace(8) %tmp1, i32 0, i32 304, i32 %arg1, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr5, ptr addrspace(8) %tmp1, i32 1, i32 304, i32 0, i32 0) nounwind memory(argmem: write)
+  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float> %struct_buffer_format_ptr6, ptr addrspace(8) %tmp1, i32 %arg1, i32 304, i32 0, i32 0) nounwind memory(argmem: write)
 
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #2
-declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) #2
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #2
-declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #2
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #3
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) #5
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) #5
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #4
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #4
-
-
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #2
-declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #2
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2
-declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2
-
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #3
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #5
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32) #5
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #4
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #4
-
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind writeonly }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind memory(argmem: read) }
-attributes #4 = { nounwind memory(argmem: write) }
-attributes #5 = { nounwind memory(argmem: readwrite) }
+declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) nounwind writeonly
+declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) nounwind writeonly
+declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) nounwind
+declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) nounwind
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) nounwind memory(argmem: read)
+declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) nounwind memory(argmem: read)
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind memory(argmem: readwrite)
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) nounwind memory(argmem: readwrite)
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) nounwind memory(argmem: write)
+declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) nounwind memory(argmem: write)
+
+
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) nounwind
+
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) nounwind memory(argmem: read)
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) nounwind memory(argmem: read)
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind memory(argmem: readwrite)
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32) nounwind memory(argmem: readwrite)
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) nounwind memory(argmem: write)
+declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) nounwind memory(argmem: write)
 
 !0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll b/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll
index bdc73e5b997205..2bb90e3c0f1986 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll
@@ -84,19 +84,14 @@ define amdgpu_cs void @test1_ptrs_reorderable(ptr addrspace(8) inreg %buf, i32 %
 }
 
 
-declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
 
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #3
+declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind writeonly
 
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #2
+declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) nounwind readonly
 
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #3
+declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) nounwind writeonly
 
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture, i32, i32, i32) #4
+declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture, i32, i32, i32) nounwind memory(argmem: read)
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture, i32, i32, i32) #5
-
-attributes #2 = { nounwind readonly }
-attributes #3 = { nounwind writeonly }
-attributes #4 = { nounwind memory(argmem: read) }
-attributes #5 = { nounwind memory(argmem: write) }
+declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture, i32, i32, i32) nounwind memory(argmem: write)
diff --git a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
index 98590c227859b0..7aa5f67cac8894 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
@@ -6,7 +6,7 @@
 ; leading to an effectively dead INSERT_SUBREG.
 
 
-define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 %descTable2) #0 {
+define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 %descTable2) "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i2 = zext i32 %descTable2 to i64
   %i4 = inttoptr i64 %i2 to ptr addrspace(4)
@@ -86,5 +86,3 @@ declare float @llvm.amdgcn.fmed3.f32(float, float, float)
 declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg)
 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
 declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
-
-attributes #0 = { "target-features"=",+wavefrontsize64,+cumode" }
diff --git a/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll b/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll
index 0784d13e588d43..119b326bf82d0a 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll
@@ -41,7 +41,4 @@ ENDIF62:                                          ; preds = %ENDIF59
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { readnone }
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-insert-elt-infloop.ll b/llvm/test/CodeGen/AMDGPU/build-vector-insert-elt-infloop.ll
index efe8f9303e2dda..354c1332e7a468 100644
--- a/llvm/test/CodeGen/AMDGPU/build-vector-insert-elt-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/build-vector-insert-elt-infloop.ll
@@ -6,7 +6,7 @@
 ; GCN-LABEL: {{^}}combine_loop:
 ; GCN: flat_load_short_d16_hi
 ; GCN: flat_store_short
-define amdgpu_kernel void @combine_loop(ptr %arg) #0 {
+define amdgpu_kernel void @combine_loop(ptr %arg) nounwind {
 bb:
   br label %bb1
 
@@ -21,5 +21,3 @@ bb1:
   %tmp8 = extractelement <2 x half> %tmp7, i32 0
   br label %bb1
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
index 1f0e09371d6d5d..92b8409d71a835 100644
--- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
@@ -23,7 +23,7 @@
 
 ; GCN: [[BB1]]
 ; GCN: s_or_b64 exec, exec
-define hidden void @void_func_byval_struct_use_outside_entry_block(ptr addrspace(5) byval(%struct.ByValStruct) noalias nocapture align 4 %arg0, ptr addrspace(5) byval(%struct.ByValStruct) noalias nocapture align 4 %arg1, i1 %cond) #1 {
+define hidden void @void_func_byval_struct_use_outside_entry_block(ptr addrspace(5) byval(%struct.ByValStruct) noalias nocapture align 4 %arg0, ptr addrspace(5) byval(%struct.ByValStruct) noalias nocapture align 4 %arg1, i1 %cond) noinline norecurse nounwind {
 entry:
   br i1 %cond, label %bb0, label %bb1
 
@@ -40,11 +40,7 @@ bb0:
 bb1:
   ret void
 }
-declare hidden void @external_void_func_void() #0
+declare hidden void @external_void_func_void() nounwind
 
 declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #3
 declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #3
-
-attributes #0 = { nounwind }
-attributes #1 = { noinline norecurse nounwind }
-attributes #2 = { nounwind norecurse "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
index a795e995603410..84840aa7d68809 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
@@ -14,21 +14,17 @@
 ; GFX90A: .amdhsa_next_free_vgpr 59
 ; GFX90A-NEXT: .amdhsa_next_free_sgpr 33
 ; GFX90A-NEXT: .amdhsa_accum_offset 32
-define amdgpu_kernel void @kernel() #0 {
+define amdgpu_kernel void @kernel() noinline norecurse nounwind optnone {
 bb:
-  call void @alias() #2
+  call void @alias() nounwind readnone willreturn
   ret void
 }
 
-define internal void @aliasee_default() #1 {
+define internal void @aliasee_default() noinline norecurse nounwind readnone willreturn {
 bb:
   call void asm sideeffect "; clobber a26 ", "~{a26}"()
   ret void
 }
 
-attributes #0 = { noinline norecurse nounwind optnone }
-attributes #1 = { noinline norecurse nounwind readnone willreturn }
-attributes #2 = { nounwind readnone willreturn }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
index c976cc3d53b5eb..e81793bfb90229 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
@@ -9,21 +9,17 @@
 ; CHECK-LABEL: {{^}}kernel0:
 ; CHECK: .amdhsa_next_free_vgpr 53
 ; CHECK-NEXT: .amdhsa_next_free_sgpr 33
-define amdgpu_kernel void @kernel0() #0 {
+define amdgpu_kernel void @kernel0() noinline norecurse nounwind optnone {
 bb:
-  call void @alias0() #2
+  call void @alias0() nounwind readnone willreturn
   ret void
 }
 
-define internal void @aliasee_default_vgpr64_sgpr102() #1 {
+define internal void @aliasee_default_vgpr64_sgpr102() noinline norecurse nounwind readnone willreturn {
 bb:
   call void asm sideeffect "; clobber v52 ", "~{v52}"()
   ret void
 }
 
-attributes #0 = { noinline norecurse nounwind optnone }
-attributes #1 = { noinline norecurse nounwind readnone willreturn }
-attributes #2 = { nounwind readnone willreturn }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
index edef71ef143dfd..1961bf57ffc663 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
@@ -11,22 +11,18 @@
 ; CHECK-LABEL: {{^}}kernel1:
 ; CHECK: .amdhsa_next_free_vgpr 41
 ; CHECK-NEXT: .amdhsa_next_free_sgpr 33
-define amdgpu_kernel void @kernel1() #0 {
+define amdgpu_kernel void @kernel1() noinline norecurse nounwind optnone {
 bb:
   call void asm sideeffect "; clobber v40 ", "~{v40}"()
-  call void @alias1() #2
+  call void @alias1() nounwind readnone willreturn
   ret void
 }
 
-define internal void @aliasee_vgpr32_sgpr76() #1 {
+define internal void @aliasee_vgpr32_sgpr76() noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="8,10" {
 bb:
   call void asm sideeffect "; clobber v26 ", "~{v26}"()
   ret void
 }
 
-attributes #0 = { noinline norecurse nounwind optnone }
-attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="8,10" }
-attributes #2 = { nounwind readnone willreturn }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
index bb34ef1a15d2b9..9d2af04cb7bc4f 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
@@ -9,21 +9,17 @@
 ; CHECK-LABEL: {{^}}kernel2:
 ; CHECK: .amdhsa_next_free_vgpr 53
 ; CHECK-NEXT: .amdhsa_next_free_sgpr 33
-define amdgpu_kernel void @kernel2() #0 {
+define amdgpu_kernel void @kernel2() noinline norecurse nounwind optnone {
 bb:
-  call void @alias2() #2
+  call void @alias2() nounwind readnone willreturn
   ret void
 }
 
-define internal void @aliasee_vgpr64_sgpr102() #1 {
+define internal void @aliasee_vgpr64_sgpr102() noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="4,10" {
 bb:
   call void asm sideeffect "; clobber v52 ", "~{v52}"()
   ret void
 }
 
-attributes #0 = { noinline norecurse nounwind optnone }
-attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="4,10" }
-attributes #2 = { nounwind readnone willreturn }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
index 8a88eb7e51ad72..d1b69562f39eca 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
@@ -9,21 +9,17 @@
 ; CHECK-LABEL: {{^}}kernel3:
 ; CHECK: .amdhsa_next_free_vgpr 253
 ; CHECK-NEXT: .amdhsa_next_free_sgpr 33
-define amdgpu_kernel void @kernel3() #0 {
+define amdgpu_kernel void @kernel3() noinline norecurse nounwind optnone {
 bb:
-  call void @alias3() #2
+  call void @alias3() nounwind readnone willreturn
   ret void
 }
 
-define internal void @aliasee_vgpr256_sgpr102() #1 {
+define internal void @aliasee_vgpr256_sgpr102() noinline norecurse nounwind readnone willreturn "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,1" {
 bb:
   call void asm sideeffect "; clobber v252 ", "~{v252}"()
   ret void
 }
 
-attributes #0 = { noinline norecurse nounwind optnone }
-attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,1" }
-attributes #2 = { nounwind readnone willreturn }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
index 8766303d7ee6ec..6abda864c06e02 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
@@ -2,38 +2,38 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
 
-declare hidden void @external_void_func_i8_inreg(i8 inreg) #0
-declare hidden void @external_void_func_i16_inreg(i32 inreg) #0
-declare hidden void @external_void_func_i32_inreg(i32 inreg) #0
-declare hidden void @external_void_func_i64_inreg(i64 inreg) #0
-declare hidden void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0
-declare hidden void @external_void_func_v3i32_inreg(<3 x i32> inreg) #0
-declare hidden void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
-declare hidden void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0
-declare hidden void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0
-declare hidden void @external_void_func_f16_inreg(half inreg) #0
-declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
-declare hidden void @external_void_func_f32_inreg(float inreg) #0
-declare hidden void @external_void_func_f64_inreg(double inreg) #0
-declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
-declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
-declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
-declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
+declare hidden void @external_void_func_i8_inreg(i8 inreg) nounwind
+declare hidden void @external_void_func_i16_inreg(i32 inreg) nounwind
+declare hidden void @external_void_func_i32_inreg(i32 inreg) nounwind
+declare hidden void @external_void_func_i64_inreg(i64 inreg) nounwind
+declare hidden void @external_void_func_v2i32_inreg(<2 x i32> inreg) nounwind
+declare hidden void @external_void_func_v3i32_inreg(<3 x i32> inreg) nounwind
+declare hidden void @external_void_func_v4i32_inreg(<4 x i32> inreg) nounwind
+declare hidden void @external_void_func_v8i32_inreg(<8 x i32> inreg) nounwind
+declare hidden void @external_void_func_v16i32_inreg(<16 x i32> inreg) nounwind
+declare hidden void @external_void_func_f16_inreg(half inreg) nounwind
+declare hidden void @external_void_func_bf16_inreg(bfloat inreg) nounwind
+declare hidden void @external_void_func_f32_inreg(float inreg) nounwind
+declare hidden void @external_void_func_f64_inreg(double inreg) nounwind
+declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) nounwind
+declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) nounwind
+declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) nounwind
+declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) nounwind
 
-declare hidden void @external_void_func_p0_inreg(ptr inreg) #0
-declare hidden void @external_void_func_p1_inreg(ptr addrspace(1) inreg) #0
-declare hidden void @external_void_func_p3_inreg(ptr addrspace(3) inreg) #0
-declare hidden void @external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg) #0
-declare hidden void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg) #0
+declare hidden void @external_void_func_p0_inreg(ptr inreg) nounwind
+declare hidden void @external_void_func_p1_inreg(ptr addrspace(1) inreg) nounwind
+declare hidden void @external_void_func_p3_inreg(ptr addrspace(3) inreg) nounwind
+declare hidden void @external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg) nounwind
+declare hidden void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg) nounwind
 
-declare hidden void @external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg, i32 inreg, i64 inreg) #0
+declare hidden void @external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg, i32 inreg, i64 inreg) nounwind
 
-declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0
-declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0
-declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) #0
-declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg, i32 inreg) #1
+declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) nounwind
+declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) nounwind
+declare hidden void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg, i32 inreg) nounwind
+declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([15 x i32] inreg, i32 inreg) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z"
 
-define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 {
+define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i8_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -94,7 +94,7 @@ define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
+define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -155,7 +155,7 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
+define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -216,7 +216,7 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
+define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -277,7 +277,7 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
+define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -338,7 +338,7 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 {
+define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -400,7 +400,7 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 {
+define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -463,7 +463,7 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
+define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v8i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -535,12 +535,12 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 {
 }
 
 ; FIXME:
-; define void @test_call_external_void_func_v16i32_inreg(<16 x i32> inreg %arg) #0 {
+; define void @test_call_external_void_func_v16i32_inreg(<16 x i32> inreg %arg) nounwind {
 ;   call void @external_void_func_v16i32_inreg(<16 x i32> inreg %arg)
 ;   ret void
 ; }
 
-define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
+define void @test_call_external_void_func_f16_inreg(half inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -601,7 +601,7 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
+define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -662,7 +662,7 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
+define void @test_call_external_void_func_f32_inreg(float inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_f32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -723,7 +723,7 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
+define void @test_call_external_void_func_f64_inreg(double inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_f64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -784,7 +784,7 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 {
+define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -846,7 +846,7 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
 }
 
 
-define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
+define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -907,7 +907,7 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
   ret void
 }
 
-define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 {
+define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -968,7 +968,7 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
   ret void
 }
 
-define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 {
+define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1029,7 +1029,7 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0
   ret void
 }
 
-define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
+define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_p0_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1090,7 +1090,7 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
   ret void
 }
 
-define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) #0 {
+define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_p1_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1151,7 +1151,7 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg)
   ret void
 }
 
-define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) #0 {
+define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_p3_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1212,7 +1212,7 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
   ret void
 }
 
-define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) #0 {
+define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2p1_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1275,7 +1275,7 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre
   ret void
 }
 
-define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) #0 {
+define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2p5_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1336,7 +1336,7 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre
   ret void
 }
 
-define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2) #0 {
+define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1402,7 +1402,7 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre
   ret void
 }
 
-define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #0 {
+define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_a15i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1488,20 +1488,20 @@ define void @test_call_external_void_func_a15i32_inreg([15 x i32] inreg %arg0) #
 }
 
 ; FIXME:
-; define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) #0 {
+; define void @test_call_external_void_func_a16i32_inreg([16 x i32] inreg %arg0) nounwind {
 ;   call void @external_void_func_a16i32_inreg([16 x i32] inreg %arg0)
 ;   ret void
 ; }
 
 ; FIXME:
-; define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #0 {
+; define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) nounwind {
 ;   call void @external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1)
 ;   ret void
 ; }
 
 
 ; FIXME: This should also fail
-define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) #1 {
+define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inreg %arg0, i32 inreg %arg1) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" {
 ; GFX9-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1576,8 +1576,5 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([15 x i32] inre
   ret void
 }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 725c2d71ac5e35..2402a1712b7522 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -5,62 +5,62 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=HSA %s
 
-declare hidden void @external_void_func_i1(i1) #0
-declare hidden void @external_void_func_i1_signext(i1 signext) #0
-declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0
+declare hidden void @external_void_func_i1(i1) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_i1_signext(i1 signext) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_i1_zeroext(i1 zeroext) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
-declare hidden void @external_void_func_i8(i8) #0
-declare hidden void @external_void_func_i8_signext(i8 signext) #0
-declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0
+declare hidden void @external_void_func_i8(i8) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_i8_signext(i8 signext) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_i8_zeroext(i8 zeroext) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
-declare hidden void @external_void_func_i16(i16) #0
-declare hidden void @external_void_func_i16_signext(i16 signext) #0
-declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0
+declare hidden void @external_void_func_i16(i16) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_i16_signext(i16 signext) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_i16_zeroext(i16 zeroext) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
-declare hidden void @external_void_func_i32(i32) #0
-declare hidden void @external_void_func_i64(i64) #0
-declare hidden void @external_void_func_v2i64(<2 x i64>) #0
-declare hidden void @external_void_func_v3i64(<3 x i64>) #0
-declare hidden void @external_void_func_v4i64(<4 x i64>) #0
+declare hidden void @external_void_func_i32(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_i64(i64) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v2i64(<2 x i64>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v3i64(<3 x i64>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v4i64(<4 x i64>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
-declare hidden void @external_void_func_f16(half) #0
-declare hidden void @external_void_func_f32(float) #0
-declare hidden void @external_void_func_f64(double) #0
-declare hidden void @external_void_func_v2f32(<2 x float>) #0
-declare hidden void @external_void_func_v2f64(<2 x double>) #0
-declare hidden void @external_void_func_v3f32(<3 x float>) #0
-declare hidden void @external_void_func_v3f64(<3 x double>) #0
-declare hidden void @external_void_func_v5f32(<5 x float>) #0
+declare hidden void @external_void_func_f16(half) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_f32(float) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_f64(double) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v2f32(<2 x float>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v2f64(<2 x double>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v3f32(<3 x float>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v3f64(<3 x double>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v5f32(<5 x float>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
-declare hidden void @external_void_func_v2i16(<2 x i16>) #0
-declare hidden void @external_void_func_v2f16(<2 x half>) #0
-declare hidden void @external_void_func_v3i16(<3 x i16>) #0
-declare hidden void @external_void_func_v3f16(<3 x half>) #0
-declare hidden void @external_void_func_v4i16(<4 x i16>) #0
-declare hidden void @external_void_func_v4f16(<4 x half>) #0
+declare hidden void @external_void_func_v2i16(<2 x i16>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v2f16(<2 x half>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v3i16(<3 x i16>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v3f16(<3 x half>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v4i16(<4 x i16>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v4f16(<4 x half>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
-declare hidden void @external_void_func_v2i32(<2 x i32>) #0
-declare hidden void @external_void_func_v3i32(<3 x i32>) #0
-declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
-declare hidden void @external_void_func_v4i32(<4 x i32>) #0
-declare hidden void @external_void_func_v5i32(<5 x i32>) #0
-declare hidden void @external_void_func_v8i32(<8 x i32>) #0
-declare hidden void @external_void_func_v16i32(<16 x i32>) #0
-declare hidden void @external_void_func_v32i32(<32 x i32>) #0
-declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
+declare hidden void @external_void_func_v2i32(<2 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v3i32(<3 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v4i32(<4 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v5i32(<5 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v8i32(<8 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v16i32(<16 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v32i32(<32 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 ; return value and argument
-declare hidden i32 @external_i32_func_i32(i32) #0
+declare hidden i32 @external_i32_func_i32(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 ; Structs
-declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0
-declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
-declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
+declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
-declare hidden void @external_void_func_v16i8(<16 x i8>) #0
+declare hidden void @external_void_func_v16i8(<16 x i8>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 ; FIXME: Should be passing -1
-define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_i1_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i1_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -144,7 +144,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i1_signext:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -249,7 +249,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
 }
 
 ; FIXME: load should be scheduled before getpc
-define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i1_zeroext:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -353,7 +353,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i8_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -438,7 +438,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
 }
 
 ; FIXME: don't wait before call
-define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i8_signext:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -538,7 +538,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i8_zeroext:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -638,7 +638,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_i16_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i16_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -722,7 +722,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i16_signext:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -822,7 +822,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i16_zeroext:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -922,7 +922,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1006,7 +1006,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_i64_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_i64_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1094,7 +1094,7 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i64() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v2i64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1199,7 +1199,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v2i64_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1296,7 +1296,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i64() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v3i64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1412,7 +1412,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i64() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v4i64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1536,7 +1536,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_f16_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_f16_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1620,7 +1620,7 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_f32_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_f32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1704,7 +1704,7 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v2f32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1792,7 +1792,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v3f32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1885,7 +1885,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v5f32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -1987,7 +1987,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_f64_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_f64_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -2075,7 +2075,7 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v2f64_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -2172,7 +2172,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v3f64_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -2278,7 +2278,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i16() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v2i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
@@ -2375,7 +2375,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i16() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v3i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
@@ -2474,7 +2474,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3f16() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v3f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
@@ -2574,7 +2574,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v3i16_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -2663,7 +2663,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v3f16_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -2753,7 +2753,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i16() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v4i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
@@ -2853,7 +2853,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v4i16_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -2944,7 +2944,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2f16() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
@@ -3043,7 +3043,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v2i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
@@ -3138,7 +3138,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v2i32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -3226,7 +3226,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v3i32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -3319,7 +3319,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v3i32_i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -3416,7 +3416,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v4i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
@@ -3511,7 +3511,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v4i32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -3608,7 +3608,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v5i32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -3710,7 +3710,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v8i32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v8i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -3822,7 +3822,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v8i32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -3937,7 +3937,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v16i32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v16i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -4059,7 +4059,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v32i32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v32i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -4210,7 +4210,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v32i32_i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -4378,7 +4378,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_i32_func_i32_imm:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
@@ -4488,7 +4488,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_struct_i8_i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -4600,7 +4600,7 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
+define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -4731,7 +4731,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
+define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -4924,7 +4924,7 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
+define amdgpu_kernel void @test_call_external_void_func_v16i8() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: test_call_external_void_func_v16i8:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
@@ -5122,7 +5122,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
   ret void
 }
 
-define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
+define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: stack_passed_arg_alignment_v32i32_f64:
 ; VI:       ; %bb.0: ; %entry
 ; VI-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
@@ -5395,7 +5395,7 @@ entry:
   ret void
 }
 
-define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
+define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: tail_call_byval_align16:
 ; VI:       ; %bb.0: ; %entry
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5483,7 +5483,7 @@ entry:
   ret void
 }
 
-define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
+define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
 ; VI:       ; %bb.0: ; %entry
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5571,7 +5571,7 @@ entry:
   ret void
 }
 
-define void @stack_12xv3i32() #0 {
+define void @stack_12xv3i32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: stack_12xv3i32:
 ; VI:       ; %bb.0: ; %entry
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5910,7 +5910,7 @@ entry:
   ret void
 }
 
-define void @stack_12xv3f32() #0 {
+define void @stack_12xv3f32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: stack_12xv3f32:
 ; VI:       ; %bb.0: ; %entry
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6253,7 +6253,7 @@ entry:
   ret void
 }
 
-define void @stack_8xv5i32() #0 {
+define void @stack_8xv5i32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: stack_8xv5i32:
 ; VI:       ; %bb.0: ; %entry
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6625,7 +6625,7 @@ entry:
   ret void
 }
 
-define void @stack_8xv5f32() #0 {
+define void @stack_8xv5f32() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; VI-LABEL: stack_8xv5f32:
 ; VI:       ; %bb.0: ; %entry
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7000,20 +7000,16 @@ entry:
   ret void
 }
 
-declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
-declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0
+declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden void @stack_passed_f64_arg(<32 x i32>, double) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
-    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
+    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
-    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
+    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
-    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
+    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
-    <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
-
-attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind noinline }
+    <5 x float>, <5 x float>, <5 x float>, <5 x float>) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
index f1992d71eb1de8..d4ea88f4eab4cd 100644
--- a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
@@ -5,7 +5,7 @@
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline at rel32@lo+4
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline at rel32@hi+12
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 {
+define amdgpu_kernel void @test_bitcast_return_type_noinline() nounwind noinline {
   %val = call float @ret_i32_noinline()
   %op = fadd float %val, 1.0
   store volatile float %op, ptr addrspace(1) undef
@@ -14,7 +14,7 @@ define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 {
 
 ; GCN-LABEL: {{^}}test_bitcast_return_type_alwaysinline:
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 {
+define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() nounwind noinline {
   %val = call float @ret_i32_alwaysinline()
   %op = fadd float %val, 1.0
   store volatile float %op, ptr addrspace(1) undef
@@ -26,7 +26,7 @@ define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 {
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@lo+4
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@hi+12
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @test_bitcast_argument_type() #0 {
+define amdgpu_kernel void @test_bitcast_argument_type() nounwind noinline {
   %val = call i32 @ident_i32(float 2.0)
   %op = add i32 %val, 1
   store volatile i32 %op, ptr addrspace(1) undef
@@ -38,7 +38,7 @@ define amdgpu_kernel void @test_bitcast_argument_type() #0 {
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@lo+4
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32 at rel32@hi+12
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @test_bitcast_argument_and_return_types() #0 {
+define amdgpu_kernel void @test_bitcast_argument_and_return_types() nounwind noinline {
   %val = call float @ident_i32(float 2.0)
   %op = fadd float %val, 1.0
   store volatile float %op, ptr addrspace(1) undef
@@ -50,7 +50,7 @@ define amdgpu_kernel void @test_bitcast_argument_and_return_types() #0 {
 ; GCN-NEXT: v_and_b32_e32 [[TMP:v[0-9]+]], 0x3ff, v31
 ; GCN-NEXT: v_add_i32_e32 v0, vcc, [[TMP]], v0
 ; GCN-NEXT: s_setpc_b64
-define hidden i32 @use_workitem_id_x(i32 %arg0) #0 {
+define hidden i32 @use_workitem_id_x(i32 %arg0) nounwind noinline {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %op = add i32 %id, %arg0
   ret i32 %op
@@ -64,7 +64,7 @@ define hidden i32 @use_workitem_id_x(i32 %arg0) #0 {
 ; GCN: v_mov_b32_e32 v0, 9
 ; GCN: s_swappc_b64
 ; GCN: v_add_f32_e32
-define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #0 {
+define amdgpu_kernel void @test_bitcast_use_workitem_id_x() nounwind noinline {
   %val = call float @use_workitem_id_x(i32 9)
   %op = fadd float %val, 1.0
   store volatile float %op, ptr addrspace(1) undef
@@ -78,7 +78,7 @@ define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #0 {
 ; GCN: s_swappc_b64
 @_ZTIi = external global ptr
 declare i32 @__gxx_personality_v0(...)
-define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v0 {
+define amdgpu_kernel void @test_invoke() nounwind noinline personality ptr @__gxx_personality_v0 {
   %val = invoke float @ident_i32(float 2.0)
           to label %continue unwind label %broken
 
@@ -95,20 +95,16 @@ continue:
 ; Callees appears last in source file to test that we still lower their
 ; arguments before we lower any calls to them.
 
-define hidden i32 @ret_i32_noinline() #0 {
+define hidden i32 @ret_i32_noinline() nounwind noinline {
   ret i32 4
 }
 
-define hidden i32 @ret_i32_alwaysinline() #1 {
+define hidden i32 @ret_i32_alwaysinline() alwaysinline nounwind {
   ret i32 4
 }
 
-define hidden i32 @ident_i32(i32 %i) #0 {
+define hidden i32 @ident_i32(i32 %i) nounwind noinline {
   ret i32 %i
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind noinline }
-attributes #1 = { alwaysinline nounwind }
-attributes #2 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/call-encoding.ll b/llvm/test/CodeGen/AMDGPU/call-encoding.ll
index 8b61e4d2d375be..f172797b89310e 100644
--- a/llvm/test/CodeGen/AMDGPU/call-encoding.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-encoding.ll
@@ -4,7 +4,7 @@
 
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @void_func_void() #1 {
+define void @void_func_void() nounwind noinline {
   ret void
 }
 
@@ -14,6 +14,3 @@ define amdgpu_kernel void @test_call_void_func_void() {
   call void @void_func_void()
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index ed418070ecb506..d618d218a0ad1a 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -9,8 +9,8 @@
 ; GCN-LABEL: {{^}}use_vcc:
 ; GCN: ; NumSgprs: 34
 ; GCN: ; NumVgprs: 0
-define void @use_vcc() #1 {
-  call void asm sideeffect "", "~{vcc}" () #0
+define void @use_vcc() nounwind noinline norecurse {
+  call void asm sideeffect "", "~{vcc}" () nounwind noinline norecurse
   ret void
 }
 
@@ -27,7 +27,7 @@ define void @use_vcc() #1 {
 ; GCN: s_setpc_b64 s[30:31]
 ; GCN: ; NumSgprs: 36
 ; GCN: ; NumVgprs: 41
-define void @indirect_use_vcc() #1 {
+define void @indirect_use_vcc() nounwind noinline norecurse {
   call void @use_vcc()
   ret void
 }
@@ -37,7 +37,7 @@ define void @indirect_use_vcc() #1 {
 ; VI-NOBUG: ; NumSgprs: 40
 ; VI-BUG: ; NumSgprs: 96
 ; GCN: ; NumVgprs: 41
-define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out) nounwind noinline norecurse {
   call void @indirect_use_vcc()
   ret void
 }
@@ -46,8 +46,8 @@ define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out)
 ; CI: ; NumSgprs: 36
 ; VI: ; NumSgprs: 38
 ; GCN: ; NumVgprs: 0
-define void @use_flat_scratch() #1 {
-  call void asm sideeffect "", "~{flat_scratch}" () #0
+define void @use_flat_scratch() nounwind noinline norecurse {
+  call void asm sideeffect "", "~{flat_scratch}" () nounwind noinline norecurse
   ret void
 }
 
@@ -55,7 +55,7 @@ define void @use_flat_scratch() #1 {
 ; CI: ; NumSgprs: 38
 ; VI: ; NumSgprs: 40
 ; GCN: ; NumVgprs: 41
-define void @indirect_use_flat_scratch() #1 {
+define void @indirect_use_flat_scratch() nounwind noinline norecurse {
   call void @use_flat_scratch()
   ret void
 }
@@ -65,57 +65,57 @@ define void @indirect_use_flat_scratch() #1 {
 ; VI-NOBUG: ; NumSgprs: 40
 ; VI-BUG: ; NumSgprs: 96
 ; GCN: ; NumVgprs: 41
-define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace(1) %out) nounwind noinline norecurse {
   call void @indirect_use_flat_scratch()
   ret void
 }
 
 ; GCN-LABEL: {{^}}use_10_vgpr:
 ; GCN: ; NumVgprs: 10
-define void @use_10_vgpr() #1 {
-  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0
-  call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0
+define void @use_10_vgpr() nounwind noinline norecurse {
+  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() nounwind noinline norecurse
+  call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() nounwind noinline norecurse
   ret void
 }
 
 ; GCN-LABEL: {{^}}indirect_use_10_vgpr:
 ; GCN: ; NumVgprs: 41
-define void @indirect_use_10_vgpr() #0 {
+define void @indirect_use_10_vgpr() nounwind noinline norecurse {
   call void @use_10_vgpr()
   ret void
 }
 
 ; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
 ; GCN: ; NumVgprs: 41
-define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
+define amdgpu_kernel void @indirect_2_level_use_10_vgpr() nounwind noinline norecurse {
   call void @indirect_use_10_vgpr()
   ret void
 }
 
 ; GCN-LABEL: {{^}}use_50_vgpr:
 ; GCN: ; NumVgprs: 50
-define void @use_50_vgpr() #1 {
-  call void asm sideeffect "", "~{v49}"() #0
+define void @use_50_vgpr() nounwind noinline norecurse {
+  call void asm sideeffect "", "~{v49}"() nounwind noinline norecurse
   ret void
 }
 
 ; GCN-LABEL: {{^}}indirect_use_50_vgpr:
 ; GCN: ; NumVgprs: 50
-define void @indirect_use_50_vgpr() #0 {
+define void @indirect_use_50_vgpr() nounwind noinline norecurse {
   call void @use_50_vgpr()
   ret void
 }
 
 ; GCN-LABEL: {{^}}use_80_sgpr:
 ; GCN: ; NumSgprs: 80
-define void @use_80_sgpr() #1 {
-  call void asm sideeffect "", "~{s79}"() #0
+define void @use_80_sgpr() nounwind noinline norecurse {
+  call void asm sideeffect "", "~{s79}"() nounwind noinline norecurse
   ret void
 }
 
 ; GCN-LABEL: {{^}}indirect_use_80_sgpr:
 ; GCN: ; NumSgprs: 82
-define void @indirect_use_80_sgpr() #1 {
+define void @indirect_use_80_sgpr() nounwind noinline norecurse {
   call void @use_80_sgpr()
   ret void
 }
@@ -124,7 +124,7 @@ define void @indirect_use_80_sgpr() #1 {
 ; CI: ; NumSgprs: 84
 ; VI-NOBUG: ; NumSgprs: 86
 ; VI-BUG: ; NumSgprs: 96
-define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
+define amdgpu_kernel void @indirect_2_level_use_80_sgpr() nounwind noinline norecurse {
   call void @indirect_use_80_sgpr()
   ret void
 }
@@ -132,32 +132,32 @@ define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
 
 ; GCN-LABEL: {{^}}use_stack0:
 ; GCN: ScratchSize: 2052
-define void @use_stack0() #1 {
+define void @use_stack0() nounwind noinline norecurse {
   %alloca = alloca [512 x i32], align 4, addrspace(5)
-  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) nounwind noinline norecurse
   ret void
 }
 
 ; GCN-LABEL: {{^}}use_stack1:
 ; GCN: ScratchSize: 404
-define void @use_stack1() #1 {
+define void @use_stack1() nounwind noinline norecurse {
   %alloca = alloca [100 x i32], align 4, addrspace(5)
-  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) nounwind noinline norecurse
   ret void
 }
 
 ; GCN-LABEL: {{^}}indirect_use_stack:
 ; GCN: ScratchSize: 2132
-define void @indirect_use_stack() #1 {
+define void @indirect_use_stack() nounwind noinline norecurse {
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) nounwind noinline norecurse
   call void @use_stack0()
   ret void
 }
 
 ; GCN-LABEL: {{^}}indirect_2_level_use_stack:
 ; GCN: ScratchSize: 2132
-define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
+define amdgpu_kernel void @indirect_2_level_use_stack() nounwind noinline norecurse {
   call void @indirect_use_stack()
   ret void
 }
@@ -166,14 +166,14 @@ define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
 ; Should be maximum of callee usage
 ; GCN-LABEL: {{^}}multi_call_use_use_stack:
 ; GCN: ScratchSize: 2052
-define amdgpu_kernel void @multi_call_use_use_stack() #0 {
+define amdgpu_kernel void @multi_call_use_use_stack() nounwind noinline norecurse {
   call void @use_stack0()
   call void @use_stack1()
   ret void
 }
 
 
-declare void @external() #0
+declare void @external() nounwind noinline norecurse
 
 ; GCN-LABEL: {{^}}usage_external:
 ; NumSgprs: 48
@@ -182,12 +182,12 @@ declare void @external() #0
 ;
 ; GCN-V5-LABEL: {{^}}usage_external:
 ; GCN-V5: ScratchSize: 0
-define amdgpu_kernel void @usage_external() #0 {
+define amdgpu_kernel void @usage_external() nounwind noinline norecurse {
   call void @external()
   ret void
 }
 
-declare void @external_recurse() #2
+declare void @external_recurse() nounwind noinline
 
 ; GCN-LABEL: {{^}}usage_external_recurse:
 ; NumSgprs: 48
@@ -196,7 +196,7 @@ declare void @external_recurse() #2
 ;
 ; GCN-V5-LABEL: {{^}}usage_external_recurse:
 ; GCN-V5: ScratchSize: 0
-define amdgpu_kernel void @usage_external_recurse() #0 {
+define amdgpu_kernel void @usage_external_recurse() nounwind noinline norecurse {
   call void @external_recurse()
   ret void
 }
@@ -206,9 +206,9 @@ define amdgpu_kernel void @usage_external_recurse() #0 {
 ;
 ; GCN-V5-LABEL: {{^}}direct_recursion_use_stack:
 ; GCN-V5: ScratchSize: 2064{{$}}
-define void @direct_recursion_use_stack(i32 %val) #2 {
+define void @direct_recursion_use_stack(i32 %val) nounwind noinline {
   %alloca = alloca [512 x i32], align 4, addrspace(5)
-  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) nounwind noinline norecurse
   %cmp = icmp eq i32 %val, 0
   br i1 %cmp, label %ret, label %call
 
@@ -226,7 +226,7 @@ ret:
 ;
 ; GCN-V5-LABEL: {{^}}usage_direct_recursion:
 ; GCN-V5: .amdhsa_private_segment_fixed_size 2064{{$}}
-define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
+define amdgpu_kernel void @usage_direct_recursion(i32 %n) nounwind noinline norecurse {
   call void @direct_recursion_use_stack(i32 %n)
   ret void
 }
@@ -240,7 +240,7 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
 ; GCN: NumVgprs: 50
 define amdgpu_kernel void @count_use_sgpr96_external_call()  {
 entry:
-  tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
+  tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) nounwind noinline norecurse
   call void @external()
   ret void
 }
@@ -254,7 +254,7 @@ entry:
 ; GCN: NumVgprs: 50
 define amdgpu_kernel void @count_use_sgpr160_external_call()  {
 entry:
-  tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
+  tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) nounwind noinline norecurse
   call void @external()
   ret void
 }
@@ -268,14 +268,10 @@ entry:
 ; GCN: NumVgprs: 50
 define amdgpu_kernel void @count_use_vgpr160_external_call()  {
 entry:
-  tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
+  tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) nounwind noinline norecurse
   call void @external()
   ret void
 }
 
-attributes #0 = { nounwind noinline norecurse }
-attributes #1 = { nounwind noinline norecurse }
-attributes #2 = { nounwind noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index c7f9ff85806fc6..1e9020485f5105 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MUBUF %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
 
-declare hidden void @external_void_func_void() #3
+declare hidden void @external_void_func_void() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 ; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
 ; GCN: s_getpc_b64 s[34:35]
@@ -14,9 +14,9 @@ declare hidden void @external_void_func_void() #3
 ; GCN-NEXT: #ASMSTART
 ; GCN-NEXT: #ASMEND
 ; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35]
-define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
+define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() nounwind {
   call void @external_void_func_void()
-  call void asm sideeffect "", ""() #0
+  call void asm sideeffect "", ""() nounwind
   call void @external_void_func_void()
   ret void
 }
@@ -47,9 +47,9 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_
 ; FLATSCR: scratch_load_dword
 ; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 ; GCN: s_setpc_b64 s[30:31]
-define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
+define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() nounwind {
   call void @external_void_func_void()
-  call void asm sideeffect "", ""() #0
+  call void asm sideeffect "", ""() nounwind
   call void @external_void_func_void()
   ret void
 }
@@ -70,7 +70,7 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa
 ; MUBUF:   buffer_load_dword v40
 ; FLATSCR: scratch_load_dword v40
 ; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
-define void @test_func_call_external_void_funcx2() #0 {
+define void @test_func_call_external_void_funcx2() nounwind {
   call void @external_void_func_void()
   call void @external_void_func_void()
   ret void
@@ -86,8 +86,8 @@ define void @test_func_call_external_void_funcx2() #0 {
 ; GCN: v_readlane_b32 s31, v0, 1
 ; GCN: v_readlane_b32 s30, v0, 0
 ; GCN: s_setpc_b64 s[30:31]
-define void @void_func_void_clobber_s30_s31() #2 {
-  call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
+define void @void_func_void_clobber_s30_s31() nounwind noinline {
+  call void asm sideeffect "; clobber", "~{s[30:31]}"() nounwind
   ret void
 }
 
@@ -96,8 +96,8 @@ define void @void_func_void_clobber_s30_s31() #2 {
 ; GCN-NEXT: ;;#ASMSTART
 ; GCN-NEXT: ;;#ASMEND
 ; GCN-NEXT: s_setpc_b64 s[30:31]
-define hidden void @void_func_void_clobber_vcc() #2 {
-  call void asm sideeffect "", "~{vcc}"() #0
+define hidden void @void_func_void_clobber_vcc() nounwind noinline {
+  call void asm sideeffect "", "~{vcc}"() nounwind
   ret void
 }
 
@@ -108,7 +108,7 @@ define hidden void @void_func_void_clobber_vcc() #2 {
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_swappc_b64
 ; GCN: s_mov_b64 vcc, s[34:35]
-define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) nounwind {
   %vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
   call void @void_func_void_clobber_vcc()
   %val0 = load volatile i32, ptr addrspace(1) undef
@@ -121,7 +121,7 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1)
 ; GCN: s_mov_b32 s33, s31
 ; GCN: s_swappc_b64
 ; GCN-NEXT: s_mov_b32 s31, s33
-define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) nounwind {
   %s31 = call i32 asm sideeffect "; def $0", "={s31}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
@@ -132,7 +132,7 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(ptr addrspace
 ; GCN: v_mov_b32_e32 v40, v31
 ; GCN: s_swappc_b64
 ; GCN-NEXT: v_mov_b32_e32 v31, v40
-define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) nounwind {
   %v31 = call i32 asm sideeffect "; def $0", "={v31}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
@@ -158,7 +158,7 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace
 ; GCN-NEXT: ;;#ASMEND
 ; GCN-NOT: s33
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) nounwind {
   %s33 = call i32 asm sideeffect "; def $0", "={s33}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
@@ -191,7 +191,7 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(
 ; GCN-NEXT: ; use s34
 ; GCN-NEXT: ;;#ASMEND
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) nounwind {
   %s34 = call i32 asm sideeffect "; def $0", "={s34}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{s34}"(i32 %s34)
@@ -223,7 +223,7 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(
 ; GCN-NEXT: ; use v40
 ; GCN-NEXT: ;;#ASMEND
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) nounwind {
   %v40 = call i32 asm sideeffect "; def $0", "={v40}"()
   call void @external_void_func_void()
   call void asm sideeffect "; use $0", "{v40}"(i32 %v40)
@@ -237,8 +237,8 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_v40(ptr addrspace(
 ; GCN-NEXT: #ASMEND
 ; GCN-NEXT:	v_readlane_b32 s33, v0, 0
 ; GCN: s_setpc_b64
-define hidden void @void_func_void_clobber_s33() #2 {
-  call void asm sideeffect "; clobber", "~{s33}"() #0
+define hidden void @void_func_void_clobber_s33() nounwind noinline {
+  call void asm sideeffect "; clobber", "~{s33}"() nounwind
   ret void
 }
 
@@ -249,8 +249,8 @@ define hidden void @void_func_void_clobber_s33() #2 {
 ; GCN-NEXT: #ASMEND
 ; GCN-NEXT:	v_readlane_b32 s34, v0, 0
 ; GCN: s_setpc_b64
-define hidden void @void_func_void_clobber_s34() #2 {
-  call void asm sideeffect "; clobber", "~{s34}"() #0
+define hidden void @void_func_void_clobber_s34() nounwind noinline {
+  call void asm sideeffect "; clobber", "~{s34}"() nounwind
   ret void
 }
 
@@ -261,7 +261,7 @@ define hidden void @void_func_void_clobber_s34() #2 {
 ; GCN-NEXT: s_addc_u32
 ; GCN: s_swappc_b64
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
+define amdgpu_kernel void @test_call_void_func_void_clobber_s33() nounwind {
   call void @void_func_void_clobber_s33()
   ret void
 }
@@ -273,7 +273,7 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
 ; GCN-NEXT: s_addc_u32
 ; GCN: s_swappc_b64
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 {
+define amdgpu_kernel void @test_call_void_func_void_clobber_s34() nounwind {
   call void @void_func_void_clobber_s34()
   ret void
 }
@@ -287,10 +287,10 @@ define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 {
 ; GCN-NOT: s40
 ; GCN: v_readlane_b32 s40, v40
 ; GCN-NOT: s40
-define void @callee_saved_sgpr_func() #2 {
-  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
+define void @callee_saved_sgpr_func() nounwind noinline {
+  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() nounwind
   call void @external_void_func_void()
-  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
+  call void asm sideeffect "; use $0", "s"(i32 %s40) nounwind
   ret void
 }
 
@@ -302,10 +302,10 @@ define void @callee_saved_sgpr_func() #2 {
 ; GCN-NOT: s40
 ; GCN: ; use s40
 ; GCN-NOT: s40
-define amdgpu_kernel void @callee_saved_sgpr_kernel() #2 {
-  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
+define amdgpu_kernel void @callee_saved_sgpr_kernel() nounwind noinline {
+  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() nounwind
   call void @external_void_func_void()
-  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
+  call void asm sideeffect "; use $0", "s"(i32 %s40) nounwind
   ret void
 }
 
@@ -319,12 +319,12 @@ define amdgpu_kernel void @callee_saved_sgpr_kernel() #2 {
 ; GCN-NOT: s40
 ; GCN: v_readlane_b32 s40, v41
 ; GCN-NOT: s40
-define void @callee_saved_sgpr_vgpr_func() #2 {
-  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
-  %v40 = call i32 asm sideeffect "; def v40", "={v40}"() #0
+define void @callee_saved_sgpr_vgpr_func() nounwind noinline {
+  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() nounwind
+  %v40 = call i32 asm sideeffect "; def v40", "={v40}"() nounwind
   call void @external_void_func_void()
-  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
-  call void asm sideeffect "; use $0", "v"(i32 %v40) #0
+  call void asm sideeffect "; use $0", "s"(i32 %s40) nounwind
+  call void asm sideeffect "; use $0", "v"(i32 %v40) nounwind
   ret void
 }
 
@@ -336,16 +336,11 @@ define void @callee_saved_sgpr_vgpr_func() #2 {
 ; GCN-NOT: s40
 ; GCN: ; use s40
 ; GCN-NOT: s40
-define amdgpu_kernel void @callee_saved_sgpr_vgpr_kernel() #2 {
-  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
-  %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0
+define amdgpu_kernel void @callee_saved_sgpr_vgpr_kernel() nounwind noinline {
+  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() nounwind
+  %v32 = call i32 asm sideeffect "; def v32", "={v32}"() nounwind
   call void @external_void_func_void()
-  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
-  call void asm sideeffect "; use $0", "v"(i32 %v32) #0
+  call void asm sideeffect "; use $0", "s"(i32 %s40) nounwind
+  call void asm sideeffect "; use $0", "v"(i32 %v32) nounwind
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind noinline }
-attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll b/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
index c62a0824591050..65d5d86c4347bb 100644
--- a/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
@@ -5,7 +5,7 @@
 ; Check for optimizing the passed implicit workitem ID based on the
 ; required group size. This should avoid a few bit packing operations.
 
-declare hidden void @callee() #0
+declare hidden void @callee() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
 
 define amdgpu_kernel void @known_x_0(ptr addrspace(1) %out) !reqd_work_group_size !0 {
 ; CHECK-LABEL: known_x_0:
@@ -123,8 +123,6 @@ define amdgpu_kernel void @known_xyz_0(ptr addrspace(1) %out) !reqd_work_group_s
 }
 ; CHECK: .amdhsa_system_vgpr_workitem_id 0
 
-attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
-
 !0 = !{i32 1, i32 64, i32 64}
 !1 = !{i32 64, i32 1, i32 64}
 !2 = !{i32 64, i32 64, i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/call-return-types.ll b/llvm/test/CodeGen/AMDGPU/call-return-types.ll
index 2a71095f7a2060..095d5f08508d9d 100644
--- a/llvm/test/CodeGen/AMDGPU/call-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-return-types.ll
@@ -3,69 +3,69 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX89 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX11 %s
 
-declare void @external_void_func_void() #0
-
-declare i1 @external_i1_func_void() #0
-declare zeroext i1 @external_i1_zeroext_func_void() #0
-declare signext i1 @external_i1_signext_func_void() #0
-
-declare i8 @external_i8_func_void() #0
-declare zeroext i8 @external_i8_zeroext_func_void() #0
-declare signext i8 @external_i8_signext_func_void() #0
-
-declare i16 @external_i16_func_void() #0
-declare <2 x i16> @external_v2i16_func_void() #0
-declare <4 x i16> @external_v4i16_func_void() #0
-declare zeroext i16 @external_i16_zeroext_func_void() #0
-declare signext i16 @external_i16_signext_func_void() #0
-
-declare i32 @external_i32_func_void() #0
-declare i64 @external_i64_func_void() #0
-declare half @external_f16_func_void() #0
-declare float @external_f32_func_void() #0
-declare double @external_f64_func_void() #0
-
-declare <2 x half> @external_v2f16_func_void() #0
-declare <4 x half> @external_v4f16_func_void() #0
-declare <3 x float> @external_v3f32_func_void() #0
-declare <5 x float> @external_v5f32_func_void() #0
-declare <2 x double> @external_v2f64_func_void() #0
-
-declare <2 x i24> @external_v2i24_func_void() #0
-
-declare <2 x i32> @external_v2i32_func_void() #0
-declare <3 x i32> @external_v3i32_func_void() #0
-declare <4 x i32> @external_v4i32_func_void() #0
-declare <5 x i32> @external_v5i32_func_void() #0
-declare <8 x i32> @external_v8i32_func_void() #0
-declare <16 x i32> @external_v16i32_func_void() #0
-declare <32 x i32> @external_v32i32_func_void() #0
-declare { <32 x i32>, i32 } @external_v32i32_i32_func_void() #0
-
-declare { i32, i64 } @external_i32_i64_func_void() #0
+declare void @external_void_func_void() nounwind
+
+declare i1 @external_i1_func_void() nounwind
+declare zeroext i1 @external_i1_zeroext_func_void() nounwind
+declare signext i1 @external_i1_signext_func_void() nounwind
+
+declare i8 @external_i8_func_void() nounwind
+declare zeroext i8 @external_i8_zeroext_func_void() nounwind
+declare signext i8 @external_i8_signext_func_void() nounwind
+
+declare i16 @external_i16_func_void() nounwind
+declare <2 x i16> @external_v2i16_func_void() nounwind
+declare <4 x i16> @external_v4i16_func_void() nounwind
+declare zeroext i16 @external_i16_zeroext_func_void() nounwind
+declare signext i16 @external_i16_signext_func_void() nounwind
+
+declare i32 @external_i32_func_void() nounwind
+declare i64 @external_i64_func_void() nounwind
+declare half @external_f16_func_void() nounwind
+declare float @external_f32_func_void() nounwind
+declare double @external_f64_func_void() nounwind
+
+declare <2 x half> @external_v2f16_func_void() nounwind
+declare <4 x half> @external_v4f16_func_void() nounwind
+declare <3 x float> @external_v3f32_func_void() nounwind
+declare <5 x float> @external_v5f32_func_void() nounwind
+declare <2 x double> @external_v2f64_func_void() nounwind
+
+declare <2 x i24> @external_v2i24_func_void() nounwind
+
+declare <2 x i32> @external_v2i32_func_void() nounwind
+declare <3 x i32> @external_v3i32_func_void() nounwind
+declare <4 x i32> @external_v4i32_func_void() nounwind
+declare <5 x i32> @external_v5i32_func_void() nounwind
+declare <8 x i32> @external_v8i32_func_void() nounwind
+declare <16 x i32> @external_v16i32_func_void() nounwind
+declare <32 x i32> @external_v32i32_func_void() nounwind
+declare { <32 x i32>, i32 } @external_v32i32_i32_func_void() nounwind
+
+declare { i32, i64 } @external_i32_i64_func_void() nounwind
 
 ; GCN-LABEL: {{^}}test_call_external_void_func_void:
-define amdgpu_kernel void @test_call_external_void_func_void() #0 {
+define amdgpu_kernel void @test_call_external_void_func_void() nounwind {
   call void @external_void_func_void()
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_void_func_void_x2:
-define amdgpu_kernel void @test_call_external_void_func_void_x2() #0 {
+define amdgpu_kernel void @test_call_external_void_func_void_x2() nounwind {
   call void @external_void_func_void()
   call void @external_void_func_void()
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i1_func_void:
-define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i1_func_void() nounwind {
   %val = call i1 @external_i1_func_void()
   store volatile i1 %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i1_zeroext_func_void:
-define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() nounwind {
   %val = call i1 @external_i1_zeroext_func_void()
   %val.ext = zext i1 %val to i32
   store volatile i32 %val.ext, ptr addrspace(1) undef
@@ -73,7 +73,7 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i1_signext_func_void:
-define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i1_signext_func_void() nounwind {
   %val = call i1 @external_i1_signext_func_void()
   %val.ext = zext i1 %val to i32
   store volatile i32 %val.ext, ptr addrspace(1) undef
@@ -81,14 +81,14 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i8_func_void:
-define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i8_func_void() nounwind {
   %val = call i8 @external_i8_func_void()
   store volatile i8 %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i8_zeroext_func_void:
-define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() nounwind {
   %val = call i8 @external_i8_zeroext_func_void()
   %val.ext = zext i8 %val to i32
   store volatile i32 %val.ext, ptr addrspace(1) undef
@@ -96,7 +96,7 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i8_signext_func_void:
-define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i8_signext_func_void() nounwind {
   %val = call i8 @external_i8_signext_func_void()
   %val.ext = zext i8 %val to i32
   store volatile i32 %val.ext, ptr addrspace(1) undef
@@ -104,14 +104,14 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i16_func_void:
-define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i16_func_void() nounwind {
   %val = call i16 @external_i16_func_void()
   store volatile i16 %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i16_zeroext_func_void:
-define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() nounwind {
   %val = call i16 @external_i16_zeroext_func_void()
   %val.ext = zext i16 %val to i32
   store volatile i32 %val.ext, ptr addrspace(1) undef
@@ -119,7 +119,7 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i16_signext_func_void:
-define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i16_signext_func_void() nounwind {
   %val = call i16 @external_i16_signext_func_void()
   %val.ext = zext i16 %val to i32
   store volatile i32 %val.ext, ptr addrspace(1) undef
@@ -127,49 +127,49 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i32_func_void:
-define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i32_func_void() nounwind {
   %val = call i32 @external_i32_func_void()
   store volatile i32 %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i64_func_void:
-define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i64_func_void() nounwind {
   %val = call i64 @external_i64_func_void()
   store volatile i64 %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_f16_func_void:
-define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_f16_func_void() nounwind {
   %val = call half @external_f16_func_void()
   store volatile half %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_f32_func_void:
-define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_f32_func_void() nounwind {
   %val = call float @external_f32_func_void()
   store volatile float %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_f64_func_void:
-define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
+define amdgpu_kernel void @test_call_external_f64_func_void() nounwind {
   %val = call double @external_f64_func_void()
   store volatile double %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v2f64_func_void:
-define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2f64_func_void() nounwind {
   %val = call <2 x double> @external_v2f64_func_void()
   store volatile <2 x double> %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v2i32_func_void:
-define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2i32_func_void() nounwind {
   %val = call <2 x i32> @external_v2i32_func_void()
   store volatile <2 x i32> %val, ptr addrspace(1) undef
   ret void
@@ -180,14 +180,14 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
 ; GFX7-DAG: flat_store_dwordx3 {{.*}}, v[0:2]
 ; GFX89-DAG: buffer_store_dwordx3 v[0:2]
 ; GFX11-DAG: buffer_store_b96 v[0:2]
-define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v3i32_func_void() nounwind {
   %val = call <3 x i32> @external_v3i32_func_void()
   store volatile <3 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v4i32_func_void:
-define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v4i32_func_void() nounwind {
   %val = call <4 x i32> @external_v4i32_func_void()
   store volatile <4 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
@@ -201,56 +201,56 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
 ; GFX89-DAG: buffer_store_dword v4
 ; GFX11-DAG: buffer_store_b128 v[0:3]
 ; GFX11-DAG: buffer_store_b32 v4
-define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v5i32_func_void() nounwind {
   %val = call <5 x i32> @external_v5i32_func_void()
   store volatile <5 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v8i32_func_void:
-define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v8i32_func_void() nounwind {
   %val = call <8 x i32> @external_v8i32_func_void()
   store volatile <8 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v16i32_func_void:
-define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v16i32_func_void() nounwind {
   %val = call <16 x i32> @external_v16i32_func_void()
   store volatile <16 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v32i32_func_void:
-define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v32i32_func_void() nounwind {
   %val = call <32 x i32> @external_v32i32_func_void()
   store volatile <32 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v2i16_func_void:
-define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2i16_func_void() nounwind {
   %val = call <2 x i16> @external_v2i16_func_void()
   store volatile <2 x i16> %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v4i16_func_void:
-define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v4i16_func_void() nounwind {
   %val = call <4 x i16> @external_v4i16_func_void()
   store volatile <4 x i16> %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v2f16_func_void:
-define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2f16_func_void() nounwind {
   %val = call <2 x half> @external_v2f16_func_void()
   store volatile <2 x half> %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_v4f16_func_void:
-define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v4f16_func_void() nounwind {
   %val = call <4 x half> @external_v4f16_func_void()
   store volatile <4 x half> %val, ptr addrspace(1) undef
   ret void
@@ -259,7 +259,7 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
 ; GCN-LABEL: {{^}}test_call_external_v2i24_func_void:
 ; GCN: s_swappc_b64
 ; GCN: v_add_{{(nc_)?}}{{i|u}}32_e32 v0, {{(vcc, )?}}v0, v1
-define amdgpu_kernel void @test_call_external_v2i24_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v2i24_func_void() nounwind {
   %val = call <2 x i24> @external_v2i24_func_void()
   %elt0 = extractelement <2 x i24> %val, i32 0
   %elt1 = extractelement <2 x i24> %val, i32 1
@@ -273,7 +273,7 @@ define amdgpu_kernel void @test_call_external_v2i24_func_void() #0 {
 ; GFX7-DAG: flat_store_dwordx3 {{.*}}, v[0:2]
 ; GFX89-DAG: buffer_store_dwordx3 v[0:2]
 ; GFX11-DAG: buffer_store_b96 v[0:2]
-define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v3f32_func_void() nounwind {
   %val = call <3 x float> @external_v3f32_func_void()
   store volatile <3 x float> %val, ptr addrspace(1) undef
   ret void
@@ -287,14 +287,14 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
 ; GFX89-DAG: buffer_store_dword v4
 ; GFX11-DAG: buffer_store_b128 v[0:3]
 ; GFX11-DAG: buffer_store_b32 v4
-define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v5f32_func_void() nounwind {
   %val = call <5 x float> @external_v5f32_func_void()
   store volatile <5 x float> %val, ptr addrspace(1) undef
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_call_external_i32_i64_func_void:
-define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
+define amdgpu_kernel void @test_call_external_i32_i64_func_void() nounwind {
   %val = call { i32, i64 } @external_i32_i64_func_void()
   %val.0 = extractvalue { i32, i64 } %val, 0
   %val.1 = extractvalue { i32, i64 } %val, 1
@@ -305,7 +305,7 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
 
 ; Requires writing results to stack
 ; GCN-LABEL: {{^}}test_call_external_v32i32_i32_func_void:
-define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
+define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() nounwind {
   %val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void()
   %val0 = extractvalue { <32 x i32>, i32 } %val, 0
   %val1 = extractvalue { <32 x i32>, i32 } %val, 1
@@ -313,7 +313,3 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
   store volatile i32 %val1, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll
index ea2bba1673a0b0..26ef3d0748816b 100644
--- a/llvm/test/CodeGen/AMDGPU/call-skip.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll
@@ -2,7 +2,7 @@
 
 ; A call should be skipped if all lanes are zero, since we don't know
 ; what side effects should be avoided inside the call.
-define hidden void @func() #1 {
+define hidden void @func() nounwind noinline {
   ret void
 }
 
@@ -11,7 +11,7 @@ define hidden void @func() #1 {
 ; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
 ; GCN: s_swappc_b64
 ; GCN: [[END]]:
-define void @if_call(i32 %flag) #0 {
+define void @if_call(i32 %flag) nounwind {
   %cc = icmp eq i32 %flag, 0
   br i1 %cc, label %call, label %end
 
@@ -28,7 +28,7 @@ end:
 ; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
 ; GCN: ; sample asm
 ; GCN: [[END]]:
-define void @if_asm(i32 %flag) #0 {
+define void @if_asm(i32 %flag) nounwind {
   %cc = icmp eq i32 %flag, 0
   br i1 %cc, label %call, label %end
 
@@ -44,7 +44,7 @@ end:
 ; GCN: s_and_saveexec_b64
 ; GCN-NEXT: s_cbranch_execz .LBB3_2
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @if_call_kernel() #0 {
+define amdgpu_kernel void @if_call_kernel() nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %cc = icmp eq i32 %id, 0
   br i1 %cc, label %call, label %end
@@ -57,8 +57,4 @@ end:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline }
-attributes #2 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/call-to-kernel-undefined.ll b/llvm/test/CodeGen/AMDGPU/call-to-kernel-undefined.ll
index da7385475088b8..46b6ad3ca46ba2 100644
--- a/llvm/test/CodeGen/AMDGPU/call-to-kernel-undefined.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-to-kernel-undefined.ll
@@ -2,7 +2,7 @@
 
 ; FIXME: It should be invalid IR to have a call to a kernel, but this
 ; is currently relied on, but should be eliminated before codegen.
-define amdgpu_kernel void @callee_kernel(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @callee_kernel(ptr addrspace(1) %out) nounwind noinline {
 entry:
   store volatile i32 0, ptr addrspace(1) %out
   ret void
@@ -11,10 +11,8 @@ entry:
 ; Make sure there's no crash when the callsite calling convention
 ; doesn't match.
 ; CHECK: LLVM ERROR: invalid call to entry function
-define amdgpu_kernel void @caller_kernel(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @caller_kernel(ptr addrspace(1) %out) nounwind noinline {
 entry:
   call void @callee_kernel(ptr addrspace(1) %out)
   ret void
 }
-
-attributes #0 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/call-to-kernel.ll b/llvm/test/CodeGen/AMDGPU/call-to-kernel.ll
index 1f4f6471fcdba1..9e2e530a50c463 100644
--- a/llvm/test/CodeGen/AMDGPU/call-to-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-to-kernel.ll
@@ -2,17 +2,15 @@
 
 ; FIXME: It should be invalid IR to have a call to a kernel, but this
 ; is currently relied on, but should be eliminated before codegen.
-define amdgpu_kernel void @callee_kernel(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @callee_kernel(ptr addrspace(1) %out) nounwind noinline {
 entry:
   store volatile i32 0, ptr addrspace(1) %out
   ret void
 }
 
 ; CHECK: LLVM ERROR: Unsupported calling convention for call
-define amdgpu_kernel void @caller_kernel(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @caller_kernel(ptr addrspace(1) %out) nounwind noinline {
 entry:
   call amdgpu_kernel void @callee_kernel(ptr addrspace(1) %out)
   ret void
 }
-
-attributes #0 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 60f2dc1ce414d0..389a3b1b13533c 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; Load argument depends on waitcnt which should be skipped.
-define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
+define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; GCN-LABEL: call_memory_arg_load:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s4, s[4:5], 0x0
@@ -25,7 +25,7 @@ define amdgpu_kernel void @call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
 }
 
 ; Memory waitcnt with no register dependence on the call
-define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
+define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; GCN-LABEL: call_memory_no_dep:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
@@ -49,7 +49,7 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
 }
 
 ; Should not wait after the call before memory
-define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #0 {
+define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; GCN-LABEL: call_no_wait_after_call:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
@@ -71,7 +71,7 @@ define amdgpu_kernel void @call_no_wait_after_call(ptr addrspace(1) %ptr, i32) #
   ret void
 }
 
-define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %ptr, i32) #0 {
+define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %ptr, i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; GCN-LABEL: call_no_wait_after_call_return_val:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
@@ -94,7 +94,7 @@ define amdgpu_kernel void @call_no_wait_after_call_return_val(ptr addrspace(1) %
 }
 
 ; Need to wait for the address dependency
-define amdgpu_kernel void @call_got_load(ptr addrspace(1) %ptr, i32) #0 {
+define amdgpu_kernel void @call_got_load(ptr addrspace(1) %ptr, i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; GCN-LABEL: call_got_load:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
@@ -115,7 +115,7 @@ define amdgpu_kernel void @call_got_load(ptr addrspace(1) %ptr, i32) #0 {
 }
 
 ; Need to wait for the address dependency
-define void @tailcall_got_load(ptr addrspace(1) %ptr, i32) #0 {
+define void @tailcall_got_load(ptr addrspace(1) %ptr, i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; GCN-LABEL: tailcall_got_load:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -131,7 +131,7 @@ define void @tailcall_got_load(ptr addrspace(1) %ptr, i32) #0 {
 }
 
 ; No need to wait for the load.
-define void @tail_call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
+define void @tail_call_memory_arg_load(ptr addrspace(3) %ptr, i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; GCN-LABEL: tail_call_memory_arg_load:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -145,11 +145,9 @@ define void @tail_call_memory_arg_load(ptr addrspace(3) %ptr, i32) #0 {
   ret void
 }
 
-declare hidden void @func(i32) #0
-declare hidden i32 @func.return(i32) #0
-declare void @got.func(i32) #0
-
-attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+declare hidden void @func(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare hidden i32 @func.return(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
+declare void @got.func(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir b/llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir
index b6dc75db3edc13..bab6534fe29ba2 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir
+++ b/llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir
@@ -5,14 +5,12 @@
 # that the return address is not clobbered in the callee by the outstanding load.
 
 --- |
-  define amdgpu_kernel void @call_waw_waitcnt() #0 {
+  define amdgpu_kernel void @call_waw_waitcnt() nounwind {
     %1 = call i32 @func()
     ret void
   }
 
-  declare hidden i32 @func() #0
-
-  attributes #0 = { nounwind }
+  declare hidden i32 @func() nounwind
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index e926a3c728cbd2..98d8f655e5d3a1 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -6,7 +6,7 @@
 ; GCN: ; %bb.0:
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @callee_no_stack() #0 {
+define void @callee_no_stack() nounwind {
   ret void
 }
 
@@ -18,7 +18,7 @@ define void @callee_no_stack() #0 {
 ; GCN-NEXT: s_mov_b32 s33, s32
 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
 ; GCN-NEXT: s_setpc_b64
-define void @callee_no_stack_no_fp_elim_all() #1 {
+define void @callee_no_stack_no_fp_elim_all() nounwind "frame-pointer"="all" {
   ret void
 }
 
@@ -26,7 +26,7 @@ define void @callee_no_stack_no_fp_elim_all() #1 {
 ; GCN: ; %bb.0:
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
+define void @callee_no_stack_no_fp_elim_nonleaf() nounwind "frame-pointer"="non-leaf" {
   ret void
 }
 
@@ -38,7 +38,7 @@ define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
 ; FLATSCR-NEXT: scratch_store_dword off, v0, s32
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @callee_with_stack() #0 {
+define void @callee_with_stack() nounwind {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   ret void
@@ -62,7 +62,7 @@ define void @callee_with_stack() #0 {
 ; FLATSCR-NEXT: s_add_i32 s32, s32, -8
 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
 ; GCN-NEXT: s_setpc_b64
-define void @callee_with_stack_no_fp_elim_all() #1 {
+define void @callee_with_stack_no_fp_elim_all() nounwind "frame-pointer"="all" {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   ret void
@@ -76,7 +76,7 @@ define void @callee_with_stack_no_fp_elim_all() #1 {
 ; FLATSCR-NEXT: scratch_store_dword off, v0, s32{{$}}
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
+define void @callee_with_stack_no_fp_elim_non_leaf() nounwind "frame-pointer"="non-leaf" {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   ret void
@@ -117,7 +117,7 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 
 ; GCN-NEXT: s_setpc_b64 s[30:31]
-define void @callee_with_stack_and_call() #0 {
+define void @callee_with_stack_and_call() nounwind {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   call void @external_void_func_void()
@@ -159,12 +159,12 @@ define void @callee_with_stack_and_call() #0 {
 ; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: s_setpc_b64 s[30:31]
-define void @callee_no_stack_with_call() #0 {
+define void @callee_no_stack_with_call() nounwind {
   call void @external_void_func_void()
   ret void
 }
 
-declare hidden void @external_void_func_void() #0
+declare hidden void @external_void_func_void() nounwind
 
 ; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and
 ; restored. No FP is required.
@@ -187,26 +187,26 @@ declare hidden void @external_void_func_void() #0
 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
-  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
-  call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0
-  call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0
-  call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0
-  call void asm sideeffect "", "~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #0
-
-  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
+define void @callee_func_sgpr_spill_no_calls(i32 %in) nounwind {
+  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() nounwind
+  call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() nounwind
+  call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() nounwind
+  call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind
+  call void asm sideeffect "", "~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() nounwind
+
+  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) nounwind
   ret void
 }
 
@@ -260,7 +260,7 @@ define void @spill_only_csr_sgpr() {
 ; FLATSCR-NEXT: s_mov_b32 s33, s0
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
+define void @callee_with_stack_no_fp_elim_csr_vgpr() nounwind "frame-pointer"="all" {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   call void asm sideeffect "; clobber v41", "~{v41}"()
@@ -295,7 +295,7 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
 ; GCN-NEXT: s_mov_b32 s33, [[TMP_SGPR]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @last_lane_vgpr_for_fp_csr() #1 {
+define void @last_lane_vgpr_for_fp_csr() nounwind "frame-pointer"="all" {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   call void asm sideeffect "; clobber v41", "~{v41}"()
@@ -306,7 +306,7 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
     ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
     ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
     ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
-    ,~{s100},~{s101},~{s102}"() #1
+    ,~{s100},~{s101},~{s102}"() nounwind "frame-pointer"="all"
 
   ret void
 }
@@ -342,7 +342,7 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @no_new_vgpr_for_fp_csr() #1 {
+define void @no_new_vgpr_for_fp_csr() nounwind "frame-pointer"="all" {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   call void asm sideeffect "; clobber v41", "~{v41}"()
@@ -353,7 +353,7 @@ define void @no_new_vgpr_for_fp_csr() #1 {
     ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
     ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
     ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
-    ,~{s100},~{s101},~{s102}"() #1
+    ,~{s100},~{s101},~{s102}"() nounwind "frame-pointer"="all"
 
   ret void
 }
@@ -378,7 +378,7 @@ define void @no_new_vgpr_for_fp_csr() #1 {
 ; FLATSCR-NEXT: s_addk_i32 s32, 0xa000
 ; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
 ; GCN-NEXT: s_setpc_b64
-define void @realign_stack_no_fp_elim() #1 {
+define void @realign_stack_no_fp_elim() nounwind "frame-pointer"="all" {
   %alloca = alloca i32, align 8192, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   ret void
@@ -412,7 +412,7 @@ define void @realign_stack_no_fp_elim() #1 {
 ; GCN-NEXT: s_mov_b32 s33, vcc_lo
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: s_setpc_b64 s[30:31]
-define void @no_unused_non_csr_sgpr_for_fp() #1 {
+define void @no_unused_non_csr_sgpr_for_fp() nounwind "frame-pointer"="all" {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
 
@@ -421,7 +421,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
     "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
     ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
     ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
-    ,~{s30},~{s31}"() #0
+    ,~{s30},~{s31}"() nounwind
 
   ret void
 }
@@ -451,7 +451,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
 ; GCN-NEXT: s_mov_b32 s33, vcc_lo
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: s_setpc_b64 s[30:31]
-define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
+define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() nounwind "frame-pointer"="all" {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
 
@@ -460,13 +460,13 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
     "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
     ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
     ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
-    ,~{s30},~{s31}"() #0
+    ,~{s30},~{s31}"() nounwind
 
   call void asm sideeffect "; clobber nonpreserved initial VGPRs",
     "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
     ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
     ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
-    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1
+    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() nounwind "frame-pointer"="all"
 
   ret void
 }
@@ -500,7 +500,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
 ; GCN-NEXT: s_mov_b32 s33, vcc_lo
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: s_setpc_b64 s[30:31]
-define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) align 4 %arg) #1 {
+define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) align 4 %arg) nounwind "frame-pointer"="all" {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
 
@@ -509,14 +509,14 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
     "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
     ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
     ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
-    ,~{s30},~{s31}"() #0
+    ,~{s30},~{s31}"() nounwind
 
   ; Use all clobberable VGPRs, so a CSR spill is needed for the VGPR
   call void asm sideeffect "; clobber nonpreserved VGPRs",
     "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
     ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
     ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
-    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1
+    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() nounwind "frame-pointer"="all"
 
   ret void
 }
@@ -524,7 +524,7 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
 ; GCN-LABEL: {{^}}local_empty_func:
 ; GCN: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define internal void @local_empty_func() #0 {
+define internal void @local_empty_func() nounwind {
   ret void
 }
 
@@ -541,7 +541,7 @@ define internal void @local_empty_func() #0 {
 ; MUBUF:   s_addk_i32 s32, 0xfc00
 ; FLATSCR: s_add_i32 s32, s32, -16
 ; GCN: s_mov_b32 s33, [[TMP_SGPR]]
-define void @ipra_call_with_stack() #0 {
+define void @ipra_call_with_stack() nounwind {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   call void @local_empty_func()
@@ -563,7 +563,7 @@ define void @ipra_call_with_stack() #0 {
 ; GCN:     s_setpc_b64
 ; MUBUF:   ScratchSize: 8
 ; FLATSCR: ScratchSize: 0
-define void @callee_need_to_spill_fp_to_memory() #3 {
+define void @callee_need_to_spill_fp_to_memory() nounwind "frame-pointer"="all" "amdgpu-waves-per-eu"="6,6" {
   call void asm sideeffect "; clobber nonpreserved SGPRs",
     "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
     ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
@@ -597,7 +597,7 @@ define void @callee_need_to_spill_fp_to_memory() #3 {
 ; MUBUF:   s_mov_b64 exec, [[COPY_EXEC2]]
 ; MUBUF:   s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 ; GCN:     s_setpc_b64
-define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
+define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() nounwind "frame-pointer"="all" "amdgpu-waves-per-eu"="6,6" {
   call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
     "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
     ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
@@ -632,7 +632,7 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
 ; GCN-NOT: v_readlane_b32 s33, v40
 ; FLATSCR: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 ; GCN:     s_setpc_b64
-define void @callee_need_to_spill_fp_to_reg() #1 {
+define void @callee_need_to_spill_fp_to_reg() nounwind "frame-pointer"="all" {
   call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
     "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
     ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
@@ -668,7 +668,7 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
 ; FLATSCR: v_mov_b32_e32 v0, 0
 ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s33, 0x1000
 ; FLATSCR: scratch_store_dword off, v0, [[SOFF]]
-define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) align 4 %arg) #3 {
+define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) align 4 %arg) nounwind "frame-pointer"="all" "amdgpu-waves-per-eu"="6,6" {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
 
@@ -691,8 +691,3 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5)
     ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38}"()
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "frame-pointer"="all" }
-attributes #2 = { nounwind "frame-pointer"="non-leaf" }
-attributes #3 = { nounwind "frame-pointer"="all" "amdgpu-waves-per-eu"="6,6" }
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
index b711542be5a7fc..812d60d7371730 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
@@ -3,8 +3,8 @@
 
 ; GCN-LABEL: {{^}}use_dispatch_ptr:
 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
-define hidden void @use_dispatch_ptr() #1 {
-  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
+define hidden void @use_dispatch_ptr() nounwind noinline {
+  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable
   %value = load volatile i32, ptr addrspace(4) %dispatch_ptr
   ret void
 }
@@ -14,15 +14,15 @@ define hidden void @use_dispatch_ptr() #1 {
 ; GCN-NOT: s4
 ; GCN-NOT: s5
 ; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
-define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
+define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) nounwind noinline {
   call void @use_dispatch_ptr()
   ret void
 }
 
 ; GCN-LABEL: {{^}}use_queue_ptr:
 ; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
-define hidden void @use_queue_ptr() #1 {
-  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
+define hidden void @use_queue_ptr() nounwind noinline {
+  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone speculatable
   %value = load volatile i32, ptr addrspace(4) %queue_ptr
   ret void
 }
@@ -30,7 +30,7 @@ define hidden void @use_queue_ptr() #1 {
 ; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
 ; GCN: s_swappc_b64 s[30:31], s[4:5]
 ; GCN: .amdhsa_user_sgpr_queue_ptr 0
-define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
+define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) nounwind noinline {
   call void @use_queue_ptr()
   ret void
 }
@@ -45,7 +45,7 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
 ; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[VGPR_HI]]]
 
 ; CIVI: {{flat|global}}_store_dword v[[[LO]]:[[HI]]]
-define hidden void @use_queue_ptr_addrspacecast() #1 {
+define hidden void @use_queue_ptr_addrspacecast() nounwind noinline {
   %asc = addrspacecast ptr addrspace(3) inttoptr (i32 16 to ptr addrspace(3)) to ptr
   store volatile i32 0, ptr %asc
   ret void
@@ -57,7 +57,7 @@ define hidden void @use_queue_ptr_addrspacecast() #1 {
 
 ; GFX9-NOT: s_mov_b64 s[6:7]
 ; GFX9: .amdhsa_user_sgpr_queue_ptr 0
-define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
+define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) nounwind noinline {
   call void @use_queue_ptr_addrspacecast()
   ret void
 }
@@ -66,30 +66,30 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
 ; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
 ; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0
 ; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0
-define hidden void @use_kernarg_segment_ptr() #1 {
-  %kernarg_segment_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
+define hidden void @use_kernarg_segment_ptr() nounwind noinline {
+  %kernarg_segment_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone speculatable
   %value = load volatile i32, ptr addrspace(4) %kernarg_segment_ptr
   ret void
 }
 
 ; GCN-LABEL: {{^}}use_implicitarg_ptr:
 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
-define hidden void @use_implicitarg_ptr() #1 {
-  %implicit.arg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
+define hidden void @use_implicitarg_ptr() nounwind noinline {
+  %implicit.arg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable
   %value = load volatile i32, ptr addrspace(4) %implicit.arg.ptr
   ret void
 }
 
 ; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
 ; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
+define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) nounwind noinline {
   call void @use_kernarg_segment_ptr()
   ret void
 }
 
 ; GCN-LABEL: {{^}}use_dispatch_id:
 ; GCN: ; use s[10:11]
-define hidden void @use_dispatch_id() #1 {
+define hidden void @use_dispatch_id() nounwind noinline {
   %id = call i64 @llvm.amdgcn.dispatch.id()
   call void asm sideeffect "; use $0", "s"(i64 %id)
   ret void
@@ -101,7 +101,7 @@ define hidden void @use_dispatch_id() #1 {
 ; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
 ; GCN: s_mov_b64 s[10:11], s[4:5]
 ; GCN: .amdhsa_user_sgpr_dispatch_id 1
-define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
+define amdgpu_kernel void @kern_indirect_use_dispatch_id() nounwind noinline {
   call void @use_dispatch_id()
   ret void
 }
@@ -109,7 +109,7 @@ define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
 ; GCN-LABEL: {{^}}use_workgroup_id_x:
 ; GCN: s_waitcnt
 ; GCN: ; use s12
-define hidden void @use_workgroup_id_x() #1 {
+define hidden void @use_workgroup_id_x() nounwind noinline {
   %val = call i32 @llvm.amdgcn.workgroup.id.x()
   call void asm sideeffect "; use $0", "s"(i32 %val)
   ret void
@@ -121,7 +121,7 @@ define hidden void @use_workgroup_id_x() #1 {
 ; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
 ; GCN: ; use s12
 ; GCN: s_setpc_b64
-define hidden void @use_stack_workgroup_id_x() #1 {
+define hidden void @use_stack_workgroup_id_x() nounwind noinline {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   %val = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -132,7 +132,7 @@ define hidden void @use_stack_workgroup_id_x() #1 {
 ; GCN-LABEL: {{^}}use_workgroup_id_y:
 ; GCN: s_waitcnt
 ; GCN: ; use s13
-define hidden void @use_workgroup_id_y() #1 {
+define hidden void @use_workgroup_id_y() nounwind noinline {
   %val = call i32 @llvm.amdgcn.workgroup.id.y()
   call void asm sideeffect "; use $0", "s"(i32 %val)
   ret void
@@ -141,7 +141,7 @@ define hidden void @use_workgroup_id_y() #1 {
 ; GCN-LABEL: {{^}}use_workgroup_id_z:
 ; GCN: s_waitcnt
 ; GCN: ; use s14
-define hidden void @use_workgroup_id_z() #1 {
+define hidden void @use_workgroup_id_z() nounwind noinline {
   %val = call i32 @llvm.amdgcn.workgroup.id.z()
   call void asm sideeffect "; use $0", "s"(i32 %val)
   ret void
@@ -150,7 +150,7 @@ define hidden void @use_workgroup_id_z() #1 {
 ; GCN-LABEL: {{^}}use_workgroup_id_xy:
 ; GCN: ; use s12
 ; GCN: ; use s13
-define hidden void @use_workgroup_id_xy() #1 {
+define hidden void @use_workgroup_id_xy() nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
   %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
   call void asm sideeffect "; use $0", "s"(i32 %val0)
@@ -162,7 +162,7 @@ define hidden void @use_workgroup_id_xy() #1 {
 ; GCN: ; use s12
 ; GCN: ; use s13
 ; GCN: ; use s14
-define hidden void @use_workgroup_id_xyz() #1 {
+define hidden void @use_workgroup_id_xyz() nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
   %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
   %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
@@ -175,7 +175,7 @@ define hidden void @use_workgroup_id_xyz() #1 {
 ; GCN-LABEL: {{^}}use_workgroup_id_xz:
 ; GCN: ; use s12
 ; GCN: ; use s14
-define hidden void @use_workgroup_id_xz() #1 {
+define hidden void @use_workgroup_id_xz() nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
   %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
   call void asm sideeffect "; use $0", "s"(i32 %val0)
@@ -186,7 +186,7 @@ define hidden void @use_workgroup_id_xz() #1 {
 ; GCN-LABEL: {{^}}use_workgroup_id_yz:
 ; GCN: ; use s13
 ; GCN: ; use s14
-define hidden void @use_workgroup_id_yz() #1 {
+define hidden void @use_workgroup_id_yz() nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
   %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
   call void asm sideeffect "; use $0", "s"(i32 %val0)
@@ -207,7 +207,7 @@ define hidden void @use_workgroup_id_yz() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() nounwind noinline {
   call void @use_workgroup_id_x()
   ret void
 }
@@ -222,7 +222,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() nounwind noinline {
   call void @use_workgroup_id_y()
   ret void
 }
@@ -240,7 +240,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() nounwind noinline {
   call void @use_workgroup_id_z()
   ret void
 }
@@ -257,7 +257,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() nounwind noinline {
   call void @use_workgroup_id_xy()
   ret void
 }
@@ -272,7 +272,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() nounwind noinline {
   call void @use_workgroup_id_xyz()
   ret void
 }
@@ -290,7 +290,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() nounwind noinline {
   call void @use_workgroup_id_xz()
   ret void
 }
@@ -306,7 +306,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
-define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
+define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() nounwind noinline {
   call void @use_workgroup_id_yz()
   ret void
 }
@@ -317,7 +317,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
 ; GCN-NOT: s13
 ; GCN-NOT: s14
 ; GCN: v_readlane_b32 s30, v40, 0
-define hidden void @func_indirect_use_workgroup_id_x() #1 {
+define hidden void @func_indirect_use_workgroup_id_x() nounwind noinline {
   call void @use_workgroup_id_x()
   ret void
 }
@@ -328,7 +328,7 @@ define hidden void @func_indirect_use_workgroup_id_x() #1 {
 ; GCN-NOT: s12
 ; GCN-NOT: s13
 ; GCN-NOT: s14
-define hidden void @func_indirect_use_workgroup_id_y() #1 {
+define hidden void @func_indirect_use_workgroup_id_y() nounwind noinline {
   call void @use_workgroup_id_y()
   ret void
 }
@@ -337,7 +337,7 @@ define hidden void @func_indirect_use_workgroup_id_y() #1 {
 ; GCN-NOT: s12
 ; GCN-NOT: s13
 ; GCN-NOT: s14
-define hidden void @func_indirect_use_workgroup_id_z() #1 {
+define hidden void @func_indirect_use_workgroup_id_z() nounwind noinline {
   call void @use_workgroup_id_z()
   ret void
 }
@@ -346,7 +346,7 @@ define hidden void @func_indirect_use_workgroup_id_z() #1 {
 ; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
 ; GCN: ; use s12
-define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
+define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) nounwind noinline {
   %val = call i32 @llvm.amdgcn.workgroup.id.x()
   store volatile i32 %arg0, ptr addrspace(1) undef
   call void asm sideeffect "; use $0", "s"(i32 %val)
@@ -357,7 +357,7 @@ define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
 ; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
 ; GCN: ; use s13
-define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
+define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) nounwind noinline {
   %val = call i32 @llvm.amdgcn.workgroup.id.y()
   store volatile i32 %arg0, ptr addrspace(1) undef
   call void asm sideeffect "; use $0", "s"(i32 %val)
@@ -368,7 +368,7 @@ define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
 ; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
 ; GCN: ; use s14
-define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
+define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) nounwind noinline {
   %val = call i32 @llvm.amdgcn.workgroup.id.z()
   store volatile i32 %arg0, ptr addrspace(1) undef
   call void asm sideeffect "; use $0", "s"(i32 %val)
@@ -390,7 +390,7 @@ define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
-define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
+define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() nounwind noinline {
   call void @other_arg_use_workgroup_id_x(i32 555)
   ret void
 }
@@ -405,7 +405,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
-define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
+define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() nounwind noinline {
   call void @other_arg_use_workgroup_id_y(i32 555)
   ret void
 }
@@ -420,7 +420,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
-define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
+define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() nounwind noinline {
   call void @other_arg_use_workgroup_id_z(i32 555)
   ret void
 }
@@ -434,17 +434,17 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
 ; GCN: ; use s12
 ; GCN: ; use s13
 ; GCN: ; use s14
-define hidden void @use_every_sgpr_input() #1 {
+define hidden void @use_every_sgpr_input() nounwind noinline {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
 
-  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
+  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable
   %val0 = load volatile i32, ptr addrspace(4) %dispatch_ptr
 
-  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
+  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone speculatable
   %val1 = load volatile i32, ptr addrspace(4) %queue_ptr
 
-  %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
+  %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable
   %val2 = load volatile i32, ptr addrspace(4) %implicitarg.ptr
 
   %val3 = call i64 @llvm.amdgcn.dispatch.id()
@@ -479,7 +479,7 @@ define hidden void @use_every_sgpr_input() #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
 ; GCN: .amdhsa_system_sgpr_workgroup_info 0
 ; GCN: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 {
+define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) nounwind noinline {
   call void @use_every_sgpr_input()
   ret void
 }
@@ -504,7 +504,7 @@ define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 {
 ; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
 ; GCN: .amdhsa_system_sgpr_workgroup_info 0
 ; GCN: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #2 {
+define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() nounwind noinline "amdgpu-implicitarg-num-bytes"="0" {
   call void @use_every_sgpr_input()
   ret void
 }
@@ -524,7 +524,7 @@ define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #2 {
 ; GCN-NOT: s[12:13]
 ; GCN-NOT: s14
 ; GCN: s_or_saveexec_b64 s[16:17], -1
-define hidden void @func_indirect_use_every_sgpr_input() #1 {
+define hidden void @func_indirect_use_every_sgpr_input() nounwind noinline {
   call void @use_every_sgpr_input()
   ret void
 }
@@ -539,17 +539,17 @@ define hidden void @func_indirect_use_every_sgpr_input() #1 {
 ; GCN: ; use s14
 
 ; GCN: s_swappc_b64
-define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
+define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() nounwind noinline {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
 
-  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
+  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable
   %val0 = load volatile i32, ptr addrspace(4) %dispatch_ptr
 
-  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
+  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone speculatable
   %val1 = load volatile i32, ptr addrspace(4) %queue_ptr
 
-  %kernarg_segment_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
+  %kernarg_segment_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable
   %val2 = load volatile i32, ptr addrspace(4) %kernarg_segment_ptr
 
   %val3 = call i64 @llvm.amdgcn.dispatch.id()
@@ -568,18 +568,14 @@ define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-declare i64 @llvm.amdgcn.dispatch.id() #0
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind noinline }
-attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" }
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone speculatable
+declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone speculatable
+declare noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone speculatable
+declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable
+declare i64 @llvm.amdgcn.dispatch.id() nounwind readnone speculatable
+declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
index 1d2523d364e550..41596281c4e2c2 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
@@ -7,7 +7,7 @@
 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_x() #1 {
+define void @use_workitem_id_x() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val, ptr addrspace(1) undef
   ret void
@@ -19,7 +19,7 @@ define void @use_workitem_id_x() #1 {
 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_y() #1 {
+define void @use_workitem_id_y() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %val, ptr addrspace(1) undef
   ret void
@@ -31,7 +31,7 @@ define void @use_workitem_id_y() #1 {
 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_z() #1 {
+define void @use_workitem_id_z() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val, ptr addrspace(1) undef
   ret void
@@ -45,7 +45,7 @@ define void @use_workitem_id_z() #1 {
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_xy() #1 {
+define void @use_workitem_id_xy() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %val0, ptr addrspace(1) undef
@@ -63,7 +63,7 @@ define void @use_workitem_id_xy() #1 {
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_xyz() #1 {
+define void @use_workitem_id_xyz() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
   %val2 = call i32 @llvm.amdgcn.workitem.id.z()
@@ -81,7 +81,7 @@ define void @use_workitem_id_xyz() #1 {
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_xz() #1 {
+define void @use_workitem_id_xz() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val0, ptr addrspace(1) undef
@@ -97,7 +97,7 @@ define void @use_workitem_id_xz() #1 {
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_yz() #1 {
+define void @use_workitem_id_yz() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val0 = call i32 @llvm.amdgcn.workitem.id.y()
   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val0, ptr addrspace(1) undef
@@ -111,7 +111,7 @@ define void @use_workitem_id_yz() #1 {
 ; GCN-NOT: v31
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_x() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_x()
   ret void
 }
@@ -128,7 +128,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 1
-define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_y() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_y()
   ret void
 }
@@ -145,7 +145,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_z() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_z()
   ret void
 }
@@ -159,7 +159,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
 ; GCN-NOT: v0
 ; GCN-NOT: v1
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_xy()
   ret void
 }
@@ -174,7 +174,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
 ; GCN-NOT: v0
 ; GCN-NOT: v2
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_xz()
   ret void
 }
@@ -189,7 +189,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
 ; GCN-NOT: v1
 ; GCN-NOT: v2
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_yz()
   ret void
 }
@@ -209,7 +209,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
 ; GCN-NOT: v1
 ; GCN-NOT: v2
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_xyz()
   ret void
 }
@@ -218,7 +218,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
 ; GCN-NOT: v0
 ; GCN: s_swappc_b64
 ; GCN-NOT: v0
-define void @func_indirect_use_workitem_id_x() #1 {
+define void @func_indirect_use_workitem_id_x() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_x()
   ret void
 }
@@ -227,7 +227,7 @@ define void @func_indirect_use_workitem_id_x() #1 {
 ; GCN-NOT: v0
 ; GCN: s_swappc_b64
 ; GCN-NOT: v0
-define void @func_indirect_use_workitem_id_y() #1 {
+define void @func_indirect_use_workitem_id_y() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_y()
   ret void
 }
@@ -236,7 +236,7 @@ define void @func_indirect_use_workitem_id_y() #1 {
 ; GCN-NOT: v0
 ; GCN: s_swappc_b64
 ; GCN-NOT: v0
-define void @func_indirect_use_workitem_id_z() #1 {
+define void @func_indirect_use_workitem_id_z() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @use_workitem_id_z()
   ret void
 }
@@ -246,7 +246,7 @@ define void @func_indirect_use_workitem_id_z() #1 {
 ; GCN-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
-define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
+define void @other_arg_use_workitem_id_x(i32 %arg0) nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %arg0, ptr addrspace(1) undef
   store volatile i32 %val, ptr addrspace(1) undef
@@ -258,7 +258,7 @@ define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
-define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
+define void @other_arg_use_workitem_id_y(i32 %arg0) nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %arg0, ptr addrspace(1) undef
   store volatile i32 %val, ptr addrspace(1) undef
@@ -270,7 +270,7 @@ define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
 ; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
-define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
+define void @other_arg_use_workitem_id_z(i32 %arg0) nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %arg0, ptr addrspace(1) undef
   store volatile i32 %val, ptr addrspace(1) undef
@@ -285,7 +285,7 @@ define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
+define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @other_arg_use_workitem_id_x(i32 555)
   ret void
 }
@@ -302,7 +302,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
 ; GCN-NOT: v0
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 1
-define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
+define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @other_arg_use_workitem_id_y(i32 555)
   ret void
 }
@@ -316,7 +316,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
 ; GCN-NOT: v0
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
+define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @other_arg_use_workitem_id_z(i32 555)
   ret void
 }
@@ -331,7 +331,7 @@ define void @too_many_args_use_workitem_id_x(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val, ptr addrspace(1) undef
 
@@ -382,7 +382,7 @@ define void @too_many_args_use_workitem_id_x(
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
+define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @too_many_args_use_workitem_id_x(
     i32 10, i32 20, i32 30, i32 40,
     i32 50, i32 60, i32 70, i32 80,
@@ -402,7 +402,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
 ; GCN-NOT: v31
 ; GCN: s_swappc_b64
 ; GCN-NOT: v31
-define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
+define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   store volatile i32 %arg0, ptr addrspace(1) undef
   call void @too_many_args_use_workitem_id_x(
     i32 10, i32 20, i32 30, i32 40,
@@ -433,7 +433,7 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @too_many_args_use_workitem_id_x(
     i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
     i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
@@ -458,7 +458,7 @@ define void @too_many_args_use_workitem_id_x_byval(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, ptr addrspace(5) byval(i32) %arg32) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, ptr addrspace(5) byval(i32) %arg32) nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val, ptr addrspace(1) undef
 
@@ -521,7 +521,7 @@ define void @too_many_args_use_workitem_id_x_byval(
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
+define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 999, ptr addrspace(5) %alloca
   call void @too_many_args_use_workitem_id_x_byval(
@@ -545,7 +545,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
 ; GCN: s_swappc_b64
-define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
+define void @func_call_too_many_args_use_workitem_id_x_byval() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 999, ptr addrspace(5) %alloca
   call void @too_many_args_use_workitem_id_x_byval(
@@ -586,7 +586,7 @@ define void @too_many_args_use_workitem_id_xyz(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val0, ptr addrspace(1) undef
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
@@ -651,7 +651,7 @@ define void @too_many_args_use_workitem_id_xyz(
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
+define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @too_many_args_use_workitem_id_xyz(
     i32 10, i32 20, i32 30, i32 40,
     i32 50, i32 60, i32 70, i32 80,
@@ -684,7 +684,7 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val0, ptr addrspace(1) undef
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
@@ -743,7 +743,7 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
+define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
   call void @too_many_args_use_workitem_id_x_stack_yz(
     i32 10, i32 20, i32 30, i32 40,
     i32 50, i32 60, i32 70, i32 80,
@@ -756,9 +756,6 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz()
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
-attributes #0 = { nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512" }
-attributes #1 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512"
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512"
+declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512"
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index 5e6f377da28e15..4f52fbcbe377f2 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -6,7 +6,7 @@
 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_x() #1 {
+define void @use_workitem_id_x() nounwind noinline {
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val, ptr addrspace(1) undef
   ret void
@@ -18,7 +18,7 @@ define void @use_workitem_id_x() #1 {
 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_y() #1 {
+define void @use_workitem_id_y() nounwind noinline {
   %val = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %val, ptr addrspace(1) undef
   ret void
@@ -30,7 +30,7 @@ define void @use_workitem_id_y() #1 {
 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_z() #1 {
+define void @use_workitem_id_z() nounwind noinline {
   %val = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val, ptr addrspace(1) undef
   ret void
@@ -45,7 +45,7 @@ define void @use_workitem_id_z() #1 {
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_xy() #1 {
+define void @use_workitem_id_xy() nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %val0, ptr addrspace(1) undef
@@ -66,7 +66,7 @@ define void @use_workitem_id_xy() #1 {
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_xyz() #1 {
+define void @use_workitem_id_xyz() nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
   %val2 = call i32 @llvm.amdgcn.workitem.id.z()
@@ -85,7 +85,7 @@ define void @use_workitem_id_xyz() #1 {
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_xz() #1 {
+define void @use_workitem_id_xz() nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val0, ptr addrspace(1) undef
@@ -102,7 +102,7 @@ define void @use_workitem_id_xz() #1 {
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]]
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @use_workitem_id_yz() #1 {
+define void @use_workitem_id_yz() nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workitem.id.y()
   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %val0, ptr addrspace(1) undef
@@ -121,7 +121,7 @@ define void @use_workitem_id_yz() #1 {
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_x() nounwind noinline {
   call void @use_workitem_id_x()
   ret void
 }
@@ -139,7 +139,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 1
-define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_y() nounwind noinline {
   call void @use_workitem_id_y()
   ret void
 }
@@ -155,7 +155,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_z() nounwind noinline {
   call void @use_workitem_id_z()
   ret void
 }
@@ -171,7 +171,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
 ; FIXEDABI-NOT: v2
 
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() nounwind noinline {
   call void @use_workitem_id_xy()
   ret void
 }
@@ -187,7 +187,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
 ; FIXEDABI-NOT: v2
 
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() nounwind noinline {
   call void @use_workitem_id_xz()
   ret void
 }
@@ -204,7 +204,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
 ; FIXEDABI-NOT: v2
 
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() nounwind noinline {
   call void @use_workitem_id_yz()
   ret void
 }
@@ -216,7 +216,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
 ; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
 
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
+define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() nounwind noinline {
   call void @use_workitem_id_xyz()
   ret void
 }
@@ -225,7 +225,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 {
 ; GCN-NOT: v0
 ; GCN: s_swappc_b64
 ; GCN-NOT: v0
-define void @func_indirect_use_workitem_id_x() #1 {
+define void @func_indirect_use_workitem_id_x() nounwind noinline {
   call void @use_workitem_id_x()
   ret void
 }
@@ -234,7 +234,7 @@ define void @func_indirect_use_workitem_id_x() #1 {
 ; GCN-NOT: v0
 ; GCN: s_swappc_b64
 ; GCN-NOT: v0
-define void @func_indirect_use_workitem_id_y() #1 {
+define void @func_indirect_use_workitem_id_y() nounwind noinline {
   call void @use_workitem_id_y()
   ret void
 }
@@ -243,7 +243,7 @@ define void @func_indirect_use_workitem_id_y() #1 {
 ; GCN-NOT: v0
 ; GCN: s_swappc_b64
 ; GCN-NOT: v0
-define void @func_indirect_use_workitem_id_z() #1 {
+define void @func_indirect_use_workitem_id_z() nounwind noinline {
   call void @use_workitem_id_z()
   ret void
 }
@@ -254,7 +254,7 @@ define void @func_indirect_use_workitem_id_z() #1 {
 
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
-define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
+define void @other_arg_use_workitem_id_x(i32 %arg0) nounwind noinline {
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %arg0, ptr addrspace(1) undef
   store volatile i32 %val, ptr addrspace(1) undef
@@ -266,7 +266,7 @@ define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
 ; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
-define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
+define void @other_arg_use_workitem_id_y(i32 %arg0) nounwind noinline {
   %val = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %arg0, ptr addrspace(1) undef
   store volatile i32 %val, ptr addrspace(1) undef
@@ -278,7 +278,7 @@ define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
 ; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]]
-define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
+define void @other_arg_use_workitem_id_z(i32 %arg0) nounwind noinline {
   %val = call i32 @llvm.amdgcn.workitem.id.z()
   store volatile i32 %arg0, ptr addrspace(1) undef
   store volatile i32 %val, ptr addrspace(1) undef
@@ -295,7 +295,7 @@ define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
+define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() nounwind noinline {
   call void @other_arg_use_workitem_id_x(i32 555)
   ret void
 }
@@ -310,7 +310,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 1
-define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
+define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() nounwind noinline {
   call void @other_arg_use_workitem_id_y(i32 555)
   ret void
 }
@@ -324,7 +324,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
+define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() nounwind noinline {
   call void @other_arg_use_workitem_id_z(i32 555)
   ret void
 }
@@ -336,7 +336,7 @@ define void @too_many_args_use_workitem_id_x(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) nounwind noinline {
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val, ptr addrspace(1) undef
 
@@ -392,7 +392,7 @@ define void @too_many_args_use_workitem_id_x(
 ; FIXEDABI: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
+define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() nounwind noinline {
   call void @too_many_args_use_workitem_id_x(
     i32 10, i32 20, i32 30, i32 40,
     i32 50, i32 60, i32 70, i32 80,
@@ -415,7 +415,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
 ; FIXEDABI-NOT: v31
 
 ; GCN: s_swappc_b64
-define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
+define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) nounwind noinline {
   store volatile i32 %arg0, ptr addrspace(1) undef
   call void @too_many_args_use_workitem_id_x(
     i32 10, i32 20, i32 30, i32 40,
@@ -446,7 +446,7 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) nounwind noinline {
   call void @too_many_args_use_workitem_id_x(
     i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
     i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
@@ -473,7 +473,7 @@ define void @too_many_args_use_workitem_id_x_byval(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, ptr addrspace(5) byval(i32) %arg32) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, ptr addrspace(5) byval(i32) %arg32) nounwind noinline {
   %val = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val, ptr addrspace(1) undef
 
@@ -540,7 +540,7 @@ define void @too_many_args_use_workitem_id_x_byval(
 ; FIXEDABI-NOT: s32
 ; FIXEDABI: buffer_store_dword [[RELOAD]], off, s[0:3], s32
 ; FIXEDABI: s_swappc_b64
-define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
+define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() nounwind noinline {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 999, ptr addrspace(5) %alloca
   call void @too_many_args_use_workitem_id_x_byval(
@@ -568,7 +568,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
 ; FIXEDABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
 ; FIXED-ABI-NOT: v31
 ; FIXEDABI: s_swappc_b64
-define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
+define void @func_call_too_many_args_use_workitem_id_x_byval() nounwind noinline {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 999, ptr addrspace(5) %alloca
   call void @too_many_args_use_workitem_id_x_byval(
@@ -598,7 +598,7 @@ define void @too_many_args_use_workitem_id_xyz(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val0, ptr addrspace(1) undef
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
@@ -660,7 +660,7 @@ define void @too_many_args_use_workitem_id_xyz(
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
+define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() nounwind noinline {
   call void @too_many_args_use_workitem_id_xyz(
     i32 10, i32 20, i32 30, i32 40,
     i32 50, i32 60, i32 70, i32 80,
@@ -690,7 +690,7 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
-  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
+  i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) nounwind noinline {
   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %val0, ptr addrspace(1) undef
   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
@@ -748,7 +748,7 @@ define void @too_many_args_use_workitem_id_x_stack_yz(
 ; GCN: s_swappc_b64
 
 ; GCN: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
+define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() nounwind noinline {
   call void @too_many_args_use_workitem_id_x_stack_yz(
     i32 10, i32 20, i32 30, i32 40,
     i32 50, i32 60, i32 70, i32 80,
@@ -761,7 +761,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz()
   ret void
 }
 
-declare hidden void @extern_hint(i32) #2
+declare hidden void @extern_hint(i32) nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 ; Workitem IDs should not be passed due to the attribute
 ; GCN-LABEL: {{^}}kern_call_no_workitem_id_hints:
@@ -771,7 +771,7 @@ declare hidden void @extern_hint(i32) #2
 ; GCN-NOT: v0
 ; GCN-NOT: v31
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_call_no_workitem_id_hints() #2 {
+define amdgpu_kernel void @kern_call_no_workitem_id_hints() nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
   call void @extern_hint(i32 9)
   ret void
 }
@@ -783,7 +783,7 @@ define amdgpu_kernel void @kern_call_no_workitem_id_hints() #2 {
 ; GCN-NOT: v0
 ; GCN-NOT: v31
 ; GCN: s_swappc_b64
-define void @func_call_no_workitem_id_hints() #2 {
+define void @func_call_no_workitem_id_hints() nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
   call void @extern_hint(i32 9)
   ret void
 }
@@ -799,18 +799,14 @@ declare hidden void @extern_nohint(i32)
 ; GCN-NOT: v0
 ; GCN-NOT: v31
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kern_callsite_workitem_id_hints() #2 {
-  call void @extern_nohint(i32 9) #2
+define amdgpu_kernel void @kern_callsite_workitem_id_hints() nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
+  call void @extern_nohint(i32 9) nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind noinline }
-attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone speculatable
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 15ebdd70ae8818..1eb38ff3cb1463 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -91,7 +91,7 @@ define amdgpu_ps half @ps_ret_cc_inreg_f16(half inreg %arg0) {
   ret half %add
 }
 
-define fastcc float @fastcc(float %arg0) #0 {
+define fastcc float @fastcc(float %arg0) nounwind noinline {
 ; GCN-LABEL: fastcc:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -101,7 +101,7 @@ define fastcc float @fastcc(float %arg0) #0 {
   ret float %add
 }
 
-define coldcc float @coldcc(float %arg0) #0 {
+define coldcc float @coldcc(float %arg0) nounwind noinline {
 ; GCN-LABEL: coldcc:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -111,7 +111,7 @@ define coldcc float @coldcc(float %arg0) #0 {
  ret float %add
 }
 
-define amdgpu_kernel void @call_coldcc() #0 {
+define amdgpu_kernel void @call_coldcc() nounwind noinline {
 ; SI-LABEL: call_coldcc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_mov_b32 s32, 0
@@ -173,7 +173,7 @@ define amdgpu_kernel void @call_coldcc() #0 {
   ret void
 }
 
-define amdgpu_kernel void @call_fastcc() #0 {
+define amdgpu_kernel void @call_fastcc() nounwind noinline {
 ; SI-LABEL: call_fastcc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_mov_b32 s32, 0
@@ -3291,5 +3291,3 @@ define amdgpu_cs void @amdgpu_cs_i1_zext(i1 zeroext %arg0) {
   store i1 %arg0, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 927e45f0294887..c9ef171736f689 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -3,7 +3,7 @@
 ; GCN-LABEL: {{^}}store_fi_lifetime:
 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dword [[FI]]
-define amdgpu_kernel void @store_fi_lifetime(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @store_fi_lifetime(ptr addrspace(1) %out, i32 %in) nounwind {
 entry:
   %b = alloca i8, addrspace(5)
   call void @llvm.lifetime.start.p5(i64 1, ptr addrspace(5) %b)
@@ -18,7 +18,7 @@ entry:
 ; GCN: buffer_store_dword v{{[0-9]+}}, off,
 ; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
 ; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO0]]
-define amdgpu_kernel void @stored_fi_to_lds(ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @stored_fi_to_lds(ptr addrspace(3) %ptr) nounwind {
   %tmp = alloca float, addrspace(5)
   store float 4.0, ptr  addrspace(5) %tmp
   store ptr addrspace(5) %tmp, ptr addrspace(3) %ptr
@@ -38,7 +38,7 @@ define amdgpu_kernel void @stored_fi_to_lds(ptr addrspace(3) %ptr) #0 {
 
 ; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
 ; GCN: ds_write_b32  [[VLDSPTR]], [[FI1]]
-define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(ptr addrspace(3) %ptr) nounwind {
   %tmp0 = alloca float, addrspace(5)
   %tmp1 = alloca float, addrspace(5)
   store float 4.0, ptr addrspace(5) %tmp0
@@ -54,7 +54,7 @@ define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(ptr addrspace(3) %pt
 ; GCN: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @stored_fi_to_self() #0 {
+define amdgpu_kernel void @stored_fi_to_self() nounwind {
   %tmp = alloca ptr addrspace(5), addrspace(5)
 
   ; Avoid optimizing everything out
@@ -72,7 +72,7 @@ define amdgpu_kernel void @stored_fi_to_self() #0 {
 
 ; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x800{{$}}
 ; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2048{{$}}
-define amdgpu_kernel void @stored_fi_to_self_offset() #0 {
+define amdgpu_kernel void @stored_fi_to_self_offset() nounwind {
   %tmp0 = alloca [512 x i32], addrspace(5)
   %tmp1 = alloca ptr addrspace(5), addrspace(5)
 
@@ -95,7 +95,7 @@ define amdgpu_kernel void @stored_fi_to_self_offset() #0 {
 
 ; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
 ; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
-define amdgpu_kernel void @stored_fi_to_fi() #0 {
+define amdgpu_kernel void @stored_fi_to_fi() nounwind {
   %tmp0 = alloca ptr addrspace(5), addrspace(5)
   %tmp1 = alloca ptr addrspace(5), addrspace(5)
   %tmp2 = alloca ptr addrspace(5), addrspace(5)
@@ -113,7 +113,7 @@ define amdgpu_kernel void @stored_fi_to_fi() #0 {
 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dword [[FI]]
-define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) nounwind {
   %tmp = alloca float, addrspace(5)
   store float 0.0, ptr  addrspace(5) %tmp
   store ptr addrspace(5) %tmp, ptr addrspace(1) %ptr
@@ -131,7 +131,7 @@ define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) #0 {
 
 ; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
 ; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1) %ptr) nounwind {
   %tmp0 = alloca float, addrspace(5)
   %tmp1 = alloca float, addrspace(5)
   %tmp2 = alloca float, addrspace(5)
@@ -158,7 +158,7 @@ define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1)
 ; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
 
 ; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(ptr addrspace(1) %ptr) nounwind {
   %tmp0 = alloca [4096 x i32], addrspace(5)
   %tmp1 = alloca [4096 x i32], addrspace(5)
   store volatile i32 0, ptr addrspace(5) %tmp0
@@ -180,7 +180,7 @@ define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(ptr addrspace(1
 ; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC_HI]], g1 at gotpcrel32@hi+12
 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dword [[FI]]
-define amdgpu_kernel void @cannot_select_assertzext_valuetype(ptr addrspace(1) %out, i32 %idx) #0 {
+define amdgpu_kernel void @cannot_select_assertzext_valuetype(ptr addrspace(1) %out, i32 %idx) nounwind {
 entry:
   %b = alloca i32, align 4, addrspace(5)
   %tmp1 = load volatile ptr addrspace(5), ptr addrspace(1) @g1, align 4
@@ -190,8 +190,5 @@ entry:
   ret void
 }
 
-declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #1
-declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { argmemonly nounwind }
+declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) argmemonly nounwind
+declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) argmemonly nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
index 9a22635e880f17..7f9f259515d30b 100644
--- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
@@ -407,7 +407,7 @@ entry:
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0
 ; GCN-ISEL: S_ADD_I32
-define amdgpu_kernel void @suaddo32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @suaddo32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind {
 ; CISI-LABEL: suaddo32:
 ; CISI:       ; %bb.0:
 ; CISI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
@@ -509,7 +509,7 @@ define amdgpu_kernel void @suaddo32(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; because the only user of VCC produced by the UADDOis v_cndmask.
 ; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC
 
-define amdgpu_kernel void @uaddo32_vcc_user(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @uaddo32_vcc_user(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind {
 ; CISI-LABEL: uaddo32_vcc_user:
 ; CISI:       ; %bb.0:
 ; CISI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -627,7 +627,7 @@ define amdgpu_kernel void @uaddo32_vcc_user(ptr addrspace(1) %out, ptr addrspace
 ; GCN-ISEL-LABEL: bb.0
 ; GCN-ISEL: S_ADD_U64_PSEUDO
 
-define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) nounwind {
 ; CISI-LABEL: suaddo64:
 ; CISI:       ; %bb.0:
 ; CISI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -764,7 +764,7 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GCN-ISEL-LABEL: bb.0
 ; GCN-ISEL: V_ADD_U64_PSEUDO
 
-define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a) #0 {
+define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a) nounwind {
 ; CISI-LABEL: vuaddo64:
 ; CISI:       ; %bb.0:
 ; CISI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1293,7 +1293,7 @@ entry:
 ; GCN-ISEL-LABEL: bb.0
 ; GCN-ISEL: S_SUB_I32
 
-define amdgpu_kernel void @susubo32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @susubo32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind {
 ; CISI-LABEL: susubo32:
 ; CISI:       ; %bb.0:
 ; CISI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
@@ -1395,7 +1395,7 @@ define amdgpu_kernel void @susubo32(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; because the only user of VCC produced by the USUBOis v_cndmask.
 ; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC
 
-define amdgpu_kernel void @usubo32_vcc_user(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @usubo32_vcc_user(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind {
 ; CISI-LABEL: usubo32_vcc_user:
 ; CISI:       ; %bb.0:
 ; CISI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1513,7 +1513,7 @@ define amdgpu_kernel void @usubo32_vcc_user(ptr addrspace(1) %out, ptr addrspace
 ; GCN-ISEL-LABEL: bb.0
 ; GCN-ISEL: S_SUB_U64_PSEUDO
 
-define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) nounwind {
 ; CISI-LABEL: susubo64:
 ; CISI:       ; %bb.0:
 ; CISI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -1650,7 +1650,7 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
 ; GCN-ISEL-LABEL: bb.0
 ; GCN-ISEL: V_SUB_U64_PSEUDO
 
-define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a) #0 {
+define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a) nounwind {
 ; CISI-LABEL: vusubo64:
 ; CISI:       ; %bb.0:
 ; CISI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -2940,18 +2940,15 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 
 
 
-declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1
+declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
 
-declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1
+declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
 
-declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN-ISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll
index c674aebabcc8d2..0045bae30215dd 100644
--- a/llvm/test/CodeGen/AMDGPU/cc-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX1010 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX1100 %s
 
-define amdgpu_kernel void @test_kern_empty() local_unnamed_addr #0 {
+define amdgpu_kernel void @test_kern_empty() local_unnamed_addr nounwind {
 ; GFX803-LABEL: test_kern_empty:
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_endpgm
@@ -24,7 +24,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_kern_stack() local_unnamed_addr #0 {
+define amdgpu_kernel void @test_kern_stack() local_unnamed_addr nounwind {
 ; GFX803-LABEL: test_kern_stack:
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_add_u32 s0, s0, s7
@@ -64,7 +64,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
+define amdgpu_kernel void @test_kern_call() local_unnamed_addr nounwind {
 ; GFX803-LABEL: test_kern_call:
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_add_i32 s10, s10, s15
@@ -140,11 +140,11 @@ define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
 ; GFX1100-NEXT:    s_endpgm
 
 entry:
-  tail call void @ex() #0
+  tail call void @ex() nounwind
   ret void
 }
 
-define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
+define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr nounwind {
 ; GFX803-LABEL: test_kern_stack_and_call:
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_add_i32 s10, s10, s15
@@ -234,11 +234,11 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
 entry:
   %x = alloca i32, align 4, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %x, align 4
-  tail call void @ex() #0
+  tail call void @ex() nounwind
   ret void
 }
 
-define amdgpu_kernel void @test_force_fp_kern_empty() local_unnamed_addr #2 {
+define amdgpu_kernel void @test_force_fp_kern_empty() local_unnamed_addr nounwind "frame-pointer"="all" {
 ; GFX803-LABEL: test_force_fp_kern_empty:
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_mov_b32 s33, 0
@@ -263,7 +263,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 {
+define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr nounwind "frame-pointer"="all" {
 ; GFX803-LABEL: test_force_fp_kern_stack:
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_add_u32 s0, s0, s7
@@ -307,7 +307,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
+define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr nounwind "frame-pointer"="all" {
 ; GFX803-LABEL: test_force_fp_kern_call:
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_add_i32 s10, s10, s15
@@ -405,11 +405,11 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
 ; GFX1010-NEXT    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1010-NEXT    s_endpgm
 entry:
-  tail call void @ex() #2
+  tail call void @ex() nounwind "frame-pointer"="all"
   ret void
 }
 
-define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 {
+define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr nounwind "frame-pointer"="all" {
 ; GFX803-LABEL: test_force_fp_kern_stack_and_call:
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_add_i32 s10, s10, s15
@@ -502,11 +502,11 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
 entry:
   %x = alloca i32, align 4, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %x, align 4
-  tail call void @ex() #2
+  tail call void @ex() nounwind "frame-pointer"="all"
   ret void
 }
 
-define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
+define amdgpu_kernel void @test_sgpr_offset_kernel() nounwind "amdgpu-num-vgpr"="8" {
 ; GFX803-LABEL: test_sgpr_offset_kernel:
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_add_u32 s0, s0, s7
@@ -588,11 +588,7 @@ entry:
   ret void
 }
 
-declare hidden void @ex() local_unnamed_addr #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-num-vgpr"="8" }
-attributes #2 = { nounwind "frame-pointer"="all" }
+declare hidden void @ex() local_unnamed_addr nounwind
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index fdae1696a5a492..a120c4f3b9549b 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -435,7 +435,7 @@ done:
   ret void
 }
 
-define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
+define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) nounwind {
 ; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
 ; OPT-GFX7-NEXT:  entry:
 ; OPT-GFX7-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
@@ -590,7 +590,7 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 entry:
   %out.gep = getelementptr i32, ptr %out, i32 1024
   %in.gep = getelementptr i8, ptr %in, i64 4095
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %endif, label %if
 
@@ -608,7 +608,7 @@ done:
   ret void
 }
 
-define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
+define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) nounwind {
 ; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
 ; OPT-NEXT:  entry:
 ; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 99999
@@ -713,7 +713,7 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
 entry:
   %out.gep = getelementptr i32, ptr %out, i64 99999
   %in.gep = getelementptr i8, ptr %in, i64 4096
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %endif, label %if
 
@@ -731,7 +731,7 @@ done:
   ret void
 }
 
-define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
+define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) nounwind {
 ; OPT-LABEL: @test_sinkable_flat_reg_offset(
 ; OPT-NEXT:  entry:
 ; OPT-NEXT:    [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
@@ -836,7 +836,7 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
 entry:
   %out.gep = getelementptr i32, ptr %out, i32 1024
   %in.gep = getelementptr i8, ptr %in, i64 %reg
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %endif, label %if
 
@@ -854,8 +854,4 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind argmemonly }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
index 1588dde19cfb78..7be876933472fb 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
@@ -43,7 +43,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(ptr add
 ; GCN-NEXT:    global_store_dword v1, v0, s[0:1] offset:252
 ; GCN-NEXT:    s_endpgm
 entry:
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) argmemonly nounwind
   %cmp = icmp eq i32 %tid, 0
   br i1 %cmp, label %endif, label %if
 
@@ -62,9 +62,5 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind readnone willreturn }
-attributes #2 = { argmemonly nounwind willreturn }
+declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) argmemonly nounwind
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index ac50fb86c96f7e..66e3240ccb95ba 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -45,7 +45,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add
 ; GCN-NEXT:    global_store_dword v1, v0, s[0:1] offset:2300
 ; GCN-NEXT:    s_endpgm
 entry:
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) argmemonly nounwind
   %cmp = icmp eq i32 %tid, 0
   br i1 %cmp, label %endif, label %if
 
@@ -65,9 +65,5 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #2
-
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind readnone willreturn }
-attributes #2 = { argmemonly nounwind willreturn }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone willreturn
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) argmemonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 49f9f695409b12..15fe6e631b2087 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -20,7 +20,7 @@ define amdgpu_kernel void @test_sink_global_small_offset_i32(ptr addrspace(1) %o
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %in.gep = getelementptr i32, ptr addrspace(1) %in, i64 7
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -53,7 +53,7 @@ define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(ptr addrspac
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 99999
   %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 65535
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -82,7 +82,7 @@ define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(ptr addrspace
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 1024
   %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 4095
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -111,7 +111,7 @@ define amdgpu_kernel void @test_sink_global_small_max_plus_1_mubuf_offset(ptr ad
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 99999
   %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 4096
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -146,7 +146,7 @@ entry:
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %add.arg = add i32 %arg, 8
   %alloca.gep = getelementptr [512 x i32], ptr addrspace(5) %alloca, i32 0, i32 1022
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -186,7 +186,7 @@ entry:
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %add.arg = add i32 %arg, 8
   %alloca.gep = getelementptr [512 x i32], ptr addrspace(5) %alloca, i32 0, i32 1023
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -223,7 +223,7 @@ entry:
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %add.arg = add i32 %arg, 8
   %alloca.gep = getelementptr [512 x i32], ptr addrspace(5) %alloca, i32 0, i32 1024
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -253,7 +253,7 @@ entry:
   %offset.ext = zext i32 %offset to i64
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %in.gep = getelementptr i32, ptr addrspace(1) %in, i64 %offset.ext
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -282,7 +282,7 @@ define amdgpu_kernel void @test_sink_constant_small_offset_i32(ptr addrspace(1)
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 7
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -311,7 +311,7 @@ define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(ptr addrspace
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 255
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -344,7 +344,7 @@ define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(ptr addrsp
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 256
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -383,7 +383,7 @@ define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(ptr addrspac
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 4294967295
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -414,7 +414,7 @@ define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(ptr addrs
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 17179869181
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -444,7 +444,7 @@ define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(ptr add
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 262143
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -482,7 +482,7 @@ define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(ptr
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 999999
   %in.gep = getelementptr i32, ptr addrspace(4) %in, i64 262144
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -538,7 +538,7 @@ define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_al
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 1024
   %in.gep = getelementptr i8, ptr addrspace(4) %in, i64 4095
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -562,7 +562,7 @@ define amdgpu_kernel void @test_sink_local_small_offset_atomicrmw_i32(ptr addrsp
 entry:
   %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999
   %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -586,7 +586,7 @@ define amdgpu_kernel void @test_sink_local_small_offset_cmpxchg_i32(ptr addrspac
 entry:
   %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999
   %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -612,7 +612,7 @@ define amdgpu_kernel void @test_wrong_operand_local_small_offset_cmpxchg_i32(ptr
 entry:
   %out.gep = getelementptr ptr addrspace(3), ptr addrspace(3) %out, i32 999999
   %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -646,7 +646,7 @@ define amdgpu_kernel void @test_sink_global_small_min_scratch_global_offset(ptr
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 1024
   %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 -4096
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -674,7 +674,7 @@ define amdgpu_kernel void @test_sink_global_small_min_scratch_global_neg1_offset
 entry:
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i64 99999
   %in.gep = getelementptr i8, ptr addrspace(1) %in, i64 -4097
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -699,7 +699,7 @@ define amdgpu_kernel void @test_sink_small_offset_ds_append(ptr addrspace(3) %ou
 entry:
   %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999
   %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -723,7 +723,7 @@ define amdgpu_kernel void @test_sink_small_offset_ds_consume(ptr addrspace(3) %o
 entry:
   %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999
   %in.gep = getelementptr i32, ptr addrspace(3) %in, i32 7
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %endif, label %if
 
@@ -740,11 +740,6 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) #3
-declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) nocapture, i1 immarg) #3
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind argmemonly }
-attributes #3 = { argmemonly convergent nounwind willreturn }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) argmemonly convergent nounwind willreturn
+declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) nocapture, i1 immarg) argmemonly convergent nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll b/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
index 12ef7657b19130..5573ead744a3db 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
@@ -35,7 +35,7 @@
 
 ; GCN: buffer_store_dword
 ; GCN: s_endpgm
-define amdgpu_kernel void @sink_ubfe_i32(ptr addrspace(1) %out, i32 %arg1) #0 {
+define amdgpu_kernel void @sink_ubfe_i32(ptr addrspace(1) %out, i32 %arg1) nounwind {
 entry:
   %shr = lshr i32 %arg1, 8
   br i1 undef, label %bb0, label %bb1
@@ -75,7 +75,7 @@ ret:
 ; OPT: ret
 
 ; GCN-LABEL: {{^}}sink_sbfe_i32:
-define amdgpu_kernel void @sink_sbfe_i32(ptr addrspace(1) %out, i32 %arg1) #0 {
+define amdgpu_kernel void @sink_sbfe_i32(ptr addrspace(1) %out, i32 %arg1) nounwind {
 entry:
   %shr = ashr i32 %arg1, 8
   br i1 undef, label %bb0, label %bb1
@@ -132,7 +132,7 @@ ret:
 
 ; GCN: buffer_store_short
 ; GCN: s_endpgm
-define amdgpu_kernel void @sink_ubfe_i16(ptr addrspace(1) %out, i16 %arg1) #0 {
+define amdgpu_kernel void @sink_ubfe_i16(ptr addrspace(1) %out, i16 %arg1) nounwind {
 entry:
   %shr = lshr i16 %arg1, 4
   br i1 undef, label %bb0, label %bb1
@@ -183,7 +183,7 @@ ret:
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
 
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(ptr addrspace(1) %out, i64 %arg1) #0 {
+define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(ptr addrspace(1) %out, i64 %arg1) nounwind {
 entry:
   %shr = lshr i64 %arg1, 30
   br i1 undef, label %bb0, label %bb1
@@ -231,7 +231,7 @@ ret:
 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
 
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @sink_ubfe_i64_low32(ptr addrspace(1) %out, i64 %arg1) #0 {
+define amdgpu_kernel void @sink_ubfe_i64_low32(ptr addrspace(1) %out, i64 %arg1) nounwind {
 entry:
   %shr = lshr i64 %arg1, 15
   br i1 undef, label %bb0, label %bb1
@@ -277,7 +277,7 @@ ret:
 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
 
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @sink_ubfe_i64_high32(ptr addrspace(1) %out, i64 %arg1) #0 {
+define amdgpu_kernel void @sink_ubfe_i64_high32(ptr addrspace(1) %out, i64 %arg1) nounwind {
 entry:
   %shr = lshr i64 %arg1, 35
   br i1 undef, label %bb0, label %bb1
@@ -297,5 +297,3 @@ ret:
   store i64 %phi, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
index 0a0179e866cd33..75f26615de97bd 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-define amdgpu_kernel void @v_clamp_add_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -72,7 +72,7 @@ define amdgpu_kernel void @v_clamp_add_src_f32(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_multi_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_multi_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_multi_use_src_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -155,7 +155,7 @@ define amdgpu_kernel void @v_clamp_multi_use_src_f32(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_dbg_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_dbg_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_dbg_use_src_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -224,7 +224,7 @@ define amdgpu_kernel void @v_clamp_dbg_use_src_f32(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_neg_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_neg_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_neg_src_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -298,7 +298,7 @@ define amdgpu_kernel void @v_clamp_add_neg_src_f32(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_non_clamp_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_non_clamp_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_non_clamp_max_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -370,7 +370,7 @@ define amdgpu_kernel void @v_non_clamp_max_f32(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_f32_denormals(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
+define amdgpu_kernel void @v_clamp_add_src_f32_denormals(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="ieee.ieee" {
 ; SI-LABEL: v_clamp_add_src_f32_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -438,7 +438,7 @@ define amdgpu_kernel void @v_clamp_add_src_f32_denormals(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_f16_denorm(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_f16_denorm(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_f16_denorm:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -508,7 +508,7 @@ define amdgpu_kernel void @v_clamp_add_src_f16_denorm(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #3 {
+define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_f16_no_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -578,7 +578,7 @@ define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_v2f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -650,7 +650,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f32(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -718,7 +718,7 @@ define amdgpu_kernel void @v_clamp_add_src_f64(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_mac_to_mad(ptr addrspace(1) %out, ptr addrspace(1) %aptr, float %a) #0 {
+define amdgpu_kernel void @v_clamp_mac_to_mad(ptr addrspace(1) %out, ptr addrspace(1) %aptr, float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_mac_to_mad:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -799,7 +799,7 @@ define amdgpu_kernel void @v_clamp_mac_to_mad(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_v2f16_denorm:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -878,7 +878,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_v2f16_no_denormals(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #3 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_no_denormals(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_v2f16_no_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -957,7 +957,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_no_denormals(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_v2f16_denorm_neg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1050,7 +1050,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_v2f16_denorm_neg_lo:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1137,7 +1137,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_v2f16_denorm_neg_hi:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1224,7 +1224,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_add_src_v2f16_denorm_shuf:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1308,7 +1308,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f32_src(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f32_src(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_no_clamp_add_src_v2f16_f32_src:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1391,7 +1391,7 @@ define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f32_src(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_no_clamp_add_packed_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_no_clamp_add_packed_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_no_clamp_add_packed_src_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1478,7 +1478,7 @@ define amdgpu_kernel void @v_no_clamp_add_packed_src_f32(ptr addrspace(1) %out,
 
 ; Since the high bits are zeroed, it probably would be OK in this case
 ; to use clamp.
-define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f16_src(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f16_src(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_no_clamp_add_src_v2f16_f16_src:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1567,7 +1567,7 @@ define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f16_src(ptr addrspace(1) %ou
 
 ; FIXME: Worse code pre-gfx9
 
-define <2 x half> @v_clamp_cvt_pkrtz_src_v2f16_denorm(float %a, float %b) #0 {
+define <2 x half> @v_clamp_cvt_pkrtz_src_v2f16_denorm(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_clamp_cvt_pkrtz_src_v2f16_denorm:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1603,31 +1603,26 @@ define <2 x half> @v_clamp_cvt_pkrtz_src_v2f16_denorm(float %a, float %b) #0 {
   ret <2 x half> %clamp
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.floor.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
-declare double @llvm.fabs.f64(double) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.maxnum.f64(double, double) #1
-declare half @llvm.fabs.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare double @llvm.minnum.f64(double, double) nounwind readnone
+declare double @llvm.maxnum.f64(double, double) nounwind readnone
+declare half @llvm.fabs.f16(half) nounwind readnone
+declare half @llvm.minnum.f16(half, half) nounwind readnone
+declare half @llvm.maxnum.f16(half, half) nounwind readnone
+declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) nounwind readnone
+declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) nounwind readnone
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
 
 
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "denormal-fp-math-f32"="ieee.ieee" }
-attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2, !3}
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index 947284506a2970..47abf81087f5fa 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
 
-define amdgpu_kernel void @v_clamp_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -86,7 +86,7 @@ define amdgpu_kernel void @v_clamp_f32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neg_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neg_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_neg_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -168,7 +168,7 @@ define amdgpu_kernel void @v_clamp_neg_f32(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_negabs_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -252,7 +252,7 @@ define amdgpu_kernel void @v_clamp_negabs_f32(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negzero_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negzero_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_negzero_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -346,7 +346,7 @@ define amdgpu_kernel void @v_clamp_negzero_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; FIXME: Weird inconsistency in how -0.0 is treated. Accepted if clamp
 ; matched through med3, not if directly. Is this correct?
-define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_negzero_maybe_snan_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -437,7 +437,7 @@ define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_multi_use_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_multi_use_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_multi_use_max_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -543,7 +543,7 @@ define amdgpu_kernel void @v_clamp_multi_use_max_f32(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -625,7 +625,7 @@ define amdgpu_kernel void @v_clamp_f16(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neg_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neg_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_neg_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -708,7 +708,7 @@ define amdgpu_kernel void @v_clamp_neg_f16(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negabs_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negabs_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_negabs_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -793,7 +793,7 @@ define amdgpu_kernel void @v_clamp_negabs_f16(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_f64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -874,7 +874,7 @@ define amdgpu_kernel void @v_clamp_f64(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neg_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neg_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_neg_f64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -956,7 +956,7 @@ define amdgpu_kernel void @v_clamp_neg_f64(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_negabs_f64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1040,7 +1040,7 @@ define amdgpu_kernel void @v_clamp_negabs_f64(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_med3_aby_negzero_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1122,7 +1122,7 @@ define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_aby_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_aby_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_med3_aby_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1201,7 +1201,7 @@ define amdgpu_kernel void @v_clamp_med3_aby_f32(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_bay_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_bay_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_med3_bay_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1280,7 +1280,7 @@ define amdgpu_kernel void @v_clamp_med3_bay_f32(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_yab_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_yab_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_med3_yab_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1359,7 +1359,7 @@ define amdgpu_kernel void @v_clamp_med3_yab_f32(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_yba_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_yba_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_med3_yba_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1438,7 +1438,7 @@ define amdgpu_kernel void @v_clamp_med3_yba_f32(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_ayb_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_ayb_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_med3_ayb_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1517,7 +1517,7 @@ define amdgpu_kernel void @v_clamp_med3_ayb_f32(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_bya_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_bya_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_med3_bya_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1596,7 +1596,7 @@ define amdgpu_kernel void @v_clamp_med3_bya_f32(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constants_to_one_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @v_clamp_constants_to_one_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_constants_to_one_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1656,7 +1656,7 @@ define amdgpu_kernel void @v_clamp_constants_to_one_f32(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constants_to_zero_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @v_clamp_constants_to_zero_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_constants_to_zero_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1715,7 +1715,7 @@ define amdgpu_kernel void @v_clamp_constants_to_zero_f32(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_preserve_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @v_clamp_constant_preserve_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_constant_preserve_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1775,7 +1775,7 @@ define amdgpu_kernel void @v_clamp_constant_preserve_f32(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_constant_preserve_denorm_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1835,7 +1835,7 @@ define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_qnan_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @v_clamp_constant_qnan_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_constant_qnan_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1894,7 +1894,7 @@ define amdgpu_kernel void @v_clamp_constant_qnan_f32(ptr addrspace(1) %out) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_snan_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @v_clamp_constant_snan_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_constant_snan_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1957,7 +1957,7 @@ define amdgpu_kernel void @v_clamp_constant_snan_f32(ptr addrspace(1) %out) #0 {
 ; Test non-default behaviors enabling snans and disabling dx10_clamp
 ; ---------------------------------------------------------------------
 
-define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
+define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2044,7 +2044,7 @@ define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #3 {
+define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_f32_snan_dx10clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2126,7 +2126,7 @@ define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #4 {
+define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_f32_snan_no_dx10clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2212,7 +2212,7 @@ define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #4 {
+define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_f32_snan_no_dx10clamp_nnan_src:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2299,7 +2299,7 @@ define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_med3_aby_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2378,7 +2378,7 @@ define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_med3_bay_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2457,7 +2457,7 @@ define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_med3_yab_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2536,7 +2536,7 @@ define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_med3_yba_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2615,7 +2615,7 @@ define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_med3_ayb_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2694,7 +2694,7 @@ define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_med3_bya_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2773,7 +2773,7 @@ define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(ptr addrspace(1) %out) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_constant_qnan_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -2833,7 +2833,7 @@ define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(ptr addrspace(1) %out) nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" {
 ; GFX6-LABEL: v_clamp_constant_snan_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -2893,7 +2893,7 @@ define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2982,7 +2982,7 @@ define amdgpu_kernel void @v_clamp_v2f16(ptr addrspace(1) %out, ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_v2f16_undef_elt:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3080,7 +3080,7 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_not_zero(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_not_zero(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_v2f16_not_zero:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3181,7 +3181,7 @@ define amdgpu_kernel void @v_clamp_v2f16_not_zero(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_not_one(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_not_one(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_v2f16_not_one:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3281,7 +3281,7 @@ define amdgpu_kernel void @v_clamp_v2f16_not_one(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_neg_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3372,7 +3372,7 @@ define amdgpu_kernel void @v_clamp_neg_v2f16(ptr addrspace(1) %out, ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_negabs_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3470,7 +3470,7 @@ define amdgpu_kernel void @v_clamp_negabs_v2f16(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neglo_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neglo_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_neglo_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3563,7 +3563,7 @@ define amdgpu_kernel void @v_clamp_neglo_v2f16(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neghi_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neghi_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_neghi_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3655,7 +3655,7 @@ define amdgpu_kernel void @v_clamp_neghi_v2f16(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_shuffle(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_shuffle(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_v2f16_shuffle:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3745,7 +3745,7 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_v2f16_undef_limit_elts0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3843,7 +3843,7 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_clamp_v2f16_undef_limit_elts1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3941,7 +3941,7 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_diff_source_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0
+define amdgpu_kernel void @v_clamp_diff_source_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
 ; GFX6-LABEL: v_clamp_diff_source_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4044,23 +4044,17 @@ define amdgpu_kernel void @v_clamp_diff_source_f32(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
-declare double @llvm.fabs.f64(double) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.maxnum.f64(double, double) #1
-declare half @llvm.fabs.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
-attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
-attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare double @llvm.minnum.f64(double, double) nounwind readnone
+declare double @llvm.maxnum.f64(double, double) nounwind readnone
+declare half @llvm.fabs.f16(half) nounwind readnone
+declare half @llvm.minnum.f16(half, half) nounwind readnone
+declare half @llvm.maxnum.f16(half, half) nounwind readnone
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nounwind readnone
+declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) nounwind readnone
+declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
index c656de046fd797..6a704ad4fcc6a6 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir
@@ -9,11 +9,9 @@
   target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
   target triple = "amdgcn--amdpal"
 
-  define amdgpu_cs void @_amdgpu_cs_main(<3 x i32>) #0 {
+  define amdgpu_cs void @_amdgpu_cs_main(<3 x i32>) nounwind "target-cpu"="gfx803" {
     ret void
   }
-
-  attributes #0 = { nounwind "target-cpu"="gfx803" }
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
index ac4f83b0a01ff3..a0f9e88afdf91d 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
@@ -10,11 +10,9 @@
   target triple = "amdgcn--amdpal"
 
   ; Function Attrs: nounwind
-  define amdgpu_ps void @_amdgpu_ps_main(float %arg) #0 {
+  define amdgpu_ps void @_amdgpu_ps_main(float %arg) nounwind "InitialPSInputAddr"="3842" "target-cpu"="gfx803" {
     ret void
   }
-
-  attributes #0 = { nounwind "InitialPSInputAddr"="3842" "target-cpu"="gfx803" }
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
index fc2a34662fa88a..3c2131788404a5 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
@@ -18,11 +18,9 @@
   target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
   target triple = "amdgcn--amdpal"
 
-  define amdgpu_ps void @main() #0 {
+  define amdgpu_ps void @main() "target-cpu"="gfx803" {
     ret void
   }
-
-  attributes #0 = { "target-cpu"="gfx803" }
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
index 9d9fb5685fa482..8afd39c4cca0d4 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
@@ -10,11 +10,9 @@
   target triple = "amdgcn--amdpal"
 
   ; Function Attrs: nounwind
-  define amdgpu_cs void @_amdgpu_cs_main(<3 x i32> %arg) #0 {
+  define amdgpu_cs void @_amdgpu_cs_main(<3 x i32> %arg) nounwind "target-cpu"="gfx803" {
     ret void
   }
-
-  attributes #0 = { nounwind "target-cpu"="gfx803" }
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 6bc8d29b3bf7c2..e02fbf85e2ba76 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -1007,7 +1007,7 @@ bb.end:                                           ; preds = %bb.then, %bb
   ret void
 }
 
-define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
+define void @scc_liveness(i32 %arg) local_unnamed_addr nounwind readnone speculatable {
 ; GCN-LABEL: scc_liveness:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1431,12 +1431,8 @@ bb12:                                             ; preds = %bb10
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.amdgcn.s.barrier() #1
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind convergent }
-attributes #2 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare void @llvm.amdgcn.s.barrier() nounwind convergent
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
index a25078230385ec..eed02d6287f799 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -490,7 +490,5 @@ bb9:                                              ; preds = %bb, %.a
 
 
 ; Function Attrs: nounwind readonly willreturn
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind willreturn memory(argmem: read) }
+declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture, i32, i32, i32 immarg) nounwind willreturn memory(argmem: read)
 
diff --git a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
index b763bec89eef39..fb56783412489d 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
@@ -20,8 +20,6 @@ bb:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
 
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
index 33c0d90f94a397..12ce95836f6170 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
@@ -740,10 +740,8 @@ bb:
   ret void
 }
 
-declare i1 @llvm.amdgcn.class.f32(float, i32) #0
+declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone speculatable
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
 
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
index e1e3220cc27554..cb23269753a9e1 100644
--- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
@@ -796,7 +796,7 @@ define i1 @test53(i32 %arg1, i32 %arg2, i32 %arg3) {
   ret i1 %and
 }
 
-define i1 @test54(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test54(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test54:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -810,7 +810,7 @@ define i1 @test54(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %or1
 }
 
-define i1 @test55(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test55(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test55:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -824,7 +824,7 @@ define i1 @test55(double %arg1, double %arg2, double %arg3) #0 {
   ret i1 %or1
 }
 
-define i1 @test56(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test56(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test56:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -838,7 +838,7 @@ define i1 @test56(double %arg1, double %arg2, double %arg3) #0 {
   ret i1 %or1
 }
 
-define i1 @test57(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test57(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test57:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -852,7 +852,7 @@ define i1 @test57(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %or1
 }
 
-define i1 @test58(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test58(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test58:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -874,7 +874,7 @@ define i1 @test58(double %arg1, double %arg2, double %arg3) #0 {
   ret i1 %and1
 }
 
-define i1 @test59(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test59(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test59:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -896,7 +896,7 @@ define i1 @test59(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %and1
 }
 
-define i1 @test60(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test60(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test60:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -918,7 +918,7 @@ define i1 @test60(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %and1
 }
 
-define i1 @test61(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test61(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test61:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -957,7 +957,7 @@ define i1 @test62(float %arg1, float %arg2, float %arg3) {
   ret i1 %or1
 }
 
-define i1 @test63(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test63(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test63:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -975,7 +975,7 @@ define i1 @test63(double %arg1, double %arg2, double %arg3) #0 {
   ret i1 %or1
 }
 
-define i1 @test64(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test64(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1028,7 +1028,7 @@ define i1 @test66(double %arg1, double %arg2, double %arg3) {
   ret i1 %and1
 }
 
-define i1 @test67(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test67(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test67:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1045,7 +1045,7 @@ define i1 @test67(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %and1
 }
 
-define i1 @test68(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test68(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test68:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1272,7 +1272,7 @@ define i1 @test77(double %arg1, double %arg2, double %arg3) {
   ret i1 %and1
 }
 
-define i1 @test78(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test78(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test78:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1286,7 +1286,7 @@ define i1 @test78(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %or1
 }
 
-define i1 @test79(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test79(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test79:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2068,7 +2068,7 @@ define i1 @test109(float %arg1, float %arg2, float %arg3, float %arg4, float %C)
   ret i1 %or3
 }
 
-define i1 @test110(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) #0 {
+define i1 @test110(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test110:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2395,7 +2395,7 @@ define i1 @test117(float %arg1, float %arg2, float %arg3, float %arg4, float %ar
 }
 
 
-define i1 @test118(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) #0 {
+define i1 @test118(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test118:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2420,7 +2420,7 @@ define i1 @test118(float %arg1, float %arg2, float %arg3, float %arg4, float %C1
   ret i1 %and2
 }
 
-define i1 @test119(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) #0 {
+define i1 @test119(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test119:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2445,7 +2445,7 @@ define i1 @test119(float %arg1, float %arg2, float %arg3, float %arg4, float %C1
   ret i1 %and2
 }
 
-define i1 @test120(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) #0 {
+define i1 @test120(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test120:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2470,7 +2470,7 @@ define i1 @test120(float %arg1, float %arg2, float %arg3, float %arg4, float %C1
   ret i1 %and2
 }
 
-define i1 @test121(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) #0 {
+define i1 @test121(float %arg1, float %arg2, float %arg3, float %arg4, float %C1, float %C2, float %C3, float %C4, float %C) nounwind "amdgpu-ieee"="false" {
 ; GCN-LABEL: test121:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2495,7 +2495,7 @@ define i1 @test121(float %arg1, float %arg2, float %arg3, float %arg4, float %C1
   ret i1 %and2
 }
 
-define i1 @test122(double %arg1, double %arg2, double %arg3) #1 {
+define i1 @test122(double %arg1, double %arg2, double %arg3) nounwind "unsafe-fp-math"="true" "no-nans-fp-math"="true" {
 ; GCN-LABEL: test122:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2509,7 +2509,7 @@ define i1 @test122(double %arg1, double %arg2, double %arg3) #1 {
   ret i1 %or1
 }
 
-define i1 @test123(double %arg1, double %arg2, double %arg3) #1 {
+define i1 @test123(double %arg1, double %arg2, double %arg3) nounwind "unsafe-fp-math"="true" "no-nans-fp-math"="true" {
 ; GCN-LABEL: test123:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2685,7 +2685,7 @@ define i1 @test133(i32 %arg1, i32 %arg2) {
   ret i1 %or
 }
 
-define i1 @test134(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test134(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test134:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2708,7 +2708,7 @@ define i1 @test134(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %and1
 }
 
-define i1 @test135(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test135(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test135:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2786,7 +2786,7 @@ define i1 @test137(float %arg1, float %arg2, float %arg3) {
   ret i1 %or1
 }
 
-define i1 @test138(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test138(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test138:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2809,7 +2809,7 @@ define i1 @test138(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %and1
 }
 
-define i1 @test139(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test139(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test139:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2832,7 +2832,7 @@ define i1 @test139(double %arg1, double %arg2, double %arg3) #0 {
   ret i1 %and1
 }
 
-define i1 @test140(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test140(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test140:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2855,7 +2855,7 @@ define i1 @test140(double %arg1, double %arg2, double %arg3) #0 {
   ret i1 %and1
 }
 
-define i1 @test141(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test141(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test141:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2878,7 +2878,7 @@ define i1 @test141(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %and1
 }
 
-define i1 @test142(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test142(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test142:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2901,7 +2901,7 @@ define i1 @test142(double %arg1, double %arg2, double %arg3) #0 {
   ret i1 %or1
 }
 
-define i1 @test143(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test143(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test143:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2924,7 +2924,7 @@ define i1 @test143(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %or1
 }
 
-define i1 @test144(float %arg1, float %arg2, float %arg3) #0 {
+define i1 @test144(float %arg1, float %arg2, float %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test144:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2947,7 +2947,7 @@ define i1 @test144(float %arg1, float %arg2, float %arg3) #0 {
   ret i1 %or1
 }
 
-define i1 @test145(double %arg1, double %arg2, double %arg3) #0 {
+define i1 @test145(double %arg1, double %arg2, double %arg3) nounwind "amdgpu-ieee"="false" {
 ; GFX11-LABEL: test145:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3194,6 +3194,3 @@ declare double @llvm.canonicalize.f64(double)
 declare float @llvm.canonicalize.f32(float)
 declare half @llvm.canonicalize.f16(half)
 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>)
-
-attributes #0 = { nounwind "amdgpu-ieee"="false" }
-attributes #1 = { nounwind "unsafe-fp-math"="true" "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares.ll b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
index d94e75c8c8e223..4cefa894722220 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-compares.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; --------------------------------------------------------------------------------
 ; i32 compares
@@ -8,8 +8,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 ; GCN-LABEL: {{^}}commute_eq_64_i32:
 ; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}}
-define amdgpu_kernel void @commute_eq_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_eq_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -21,8 +21,8 @@ define amdgpu_kernel void @commute_eq_64_i32(ptr addrspace(1) %out, ptr addrspac
 
 ; GCN-LABEL: {{^}}commute_ne_64_i32:
 ; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ne_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ne_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -36,8 +36,8 @@ define amdgpu_kernel void @commute_ne_64_i32(ptr addrspace(1) %out, ptr addrspac
 ; GCN-LABEL: {{^}}commute_ne_litk_i32:
 ; GCN: s_movk_i32 [[K:s[0-9]+]], 0x3039
 ; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}}
-define amdgpu_kernel void @commute_ne_litk_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ne_litk_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -49,8 +49,8 @@ define amdgpu_kernel void @commute_ne_litk_i32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ugt_64_i32:
 ; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -62,8 +62,8 @@ define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspa
 
 ; GCN-LABEL: {{^}}commute_uge_64_i32:
 ; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}}
-define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -75,8 +75,8 @@ define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspa
 
 ; GCN-LABEL: {{^}}commute_ult_64_i32:
 ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -88,8 +88,8 @@ define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspa
 
 ; GCN-LABEL: {{^}}commute_ule_63_i32:
 ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -102,8 +102,8 @@ define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspa
 ; GCN-LABEL: {{^}}commute_ule_64_i32:
 ; GCN: s_movk_i32 [[K:s[0-9]+]], 0x41{{$}}
 ; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
-define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -115,8 +115,8 @@ define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspa
 
 ; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
 ; GCN: v_ashrrev_i32_e32 v2, 31, v2
-define amdgpu_kernel void @commute_sgt_neg1_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_sgt_neg1_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -128,8 +128,8 @@ define amdgpu_kernel void @commute_sgt_neg1_i32(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}commute_sge_neg2_i32:
 ; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}}
-define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -141,8 +141,8 @@ define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}commute_slt_neg16_i32:
 ; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}}
-define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -154,8 +154,8 @@ define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addr
 
 ; GCN-LABEL: {{^}}commute_sle_5_i32:
 ; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}}
-define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i32, ptr addrspace(1) %gep.in
@@ -171,8 +171,8 @@ define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspac
 
 ; GCN-LABEL: {{^}}commute_eq_64_i64:
 ; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_eq_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_eq_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -184,8 +184,8 @@ define amdgpu_kernel void @commute_eq_64_i64(ptr addrspace(1) %out, ptr addrspac
 
 ; GCN-LABEL: {{^}}commute_ne_64_i64:
 ; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ne_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ne_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -197,8 +197,8 @@ define amdgpu_kernel void @commute_ne_64_i64(ptr addrspace(1) %out, ptr addrspac
 
 ; GCN-LABEL: {{^}}commute_ugt_64_i64:
 ; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -210,8 +210,8 @@ define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspa
 
 ; GCN-LABEL: {{^}}commute_uge_64_i64:
 ; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -223,8 +223,8 @@ define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspa
 
 ; GCN-LABEL: {{^}}commute_ult_64_i64:
 ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ult_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ult_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -236,8 +236,8 @@ define amdgpu_kernel void @commute_ult_64_i64(ptr addrspace(1) %out, ptr addrspa
 
 ; GCN-LABEL: {{^}}commute_ule_63_i64:
 ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ule_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ule_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -252,8 +252,8 @@ define amdgpu_kernel void @commute_ule_63_i64(ptr addrspace(1) %out, ptr addrspa
 ; GCN-LABEL: {{^}}commute_ule_64_i64:
 ; GCN: s_mov_b64 [[K:s\[[0-9:]+\]]], 0x41
 ; GCN: v_cmp_gt_u64_e32 vcc, [[K]], v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ule_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ule_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -265,8 +265,8 @@ define amdgpu_kernel void @commute_ule_64_i64(ptr addrspace(1) %out, ptr addrspa
 
 ; GCN-LABEL: {{^}}commute_sgt_neg1_i64:
 ; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_sgt_neg1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_sgt_neg1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -278,8 +278,8 @@ define amdgpu_kernel void @commute_sgt_neg1_i64(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}commute_sge_neg2_i64:
 ; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_sge_neg2_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_sge_neg2_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -291,8 +291,8 @@ define amdgpu_kernel void @commute_sge_neg2_i64(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}commute_slt_neg16_i64:
 ; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_slt_neg16_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_slt_neg16_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -304,8 +304,8 @@ define amdgpu_kernel void @commute_slt_neg16_i64(ptr addrspace(1) %out, ptr addr
 
 ; GCN-LABEL: {{^}}commute_sle_5_i64:
 ; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_sle_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_sle_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load i64, ptr addrspace(1) %gep.in
@@ -322,8 +322,8 @@ define amdgpu_kernel void @commute_sle_5_i64(ptr addrspace(1) %out, ptr addrspac
 
 ; GCN-LABEL: {{^}}commute_oeq_2.0_f32:
 ; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_oeq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_oeq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -336,8 +336,8 @@ define amdgpu_kernel void @commute_oeq_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ogt_2.0_f32:
 ; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ogt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ogt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -349,8 +349,8 @@ define amdgpu_kernel void @commute_ogt_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_oge_2.0_f32:
 ; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_oge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_oge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -362,8 +362,8 @@ define amdgpu_kernel void @commute_oge_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_olt_2.0_f32:
 ; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_olt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_olt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -375,8 +375,8 @@ define amdgpu_kernel void @commute_olt_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ole_2.0_f32:
 ; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ole_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ole_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -388,8 +388,8 @@ define amdgpu_kernel void @commute_ole_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_one_2.0_f32:
 ; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_one_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_one_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -401,8 +401,8 @@ define amdgpu_kernel void @commute_one_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ord_2.0_f32:
 ; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
-define amdgpu_kernel void @commute_ord_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ord_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -414,8 +414,8 @@ define amdgpu_kernel void @commute_ord_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ueq_2.0_f32:
 ; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ueq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ueq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -427,8 +427,8 @@ define amdgpu_kernel void @commute_ueq_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ugt_2.0_f32:
 ; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ugt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ugt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -440,8 +440,8 @@ define amdgpu_kernel void @commute_ugt_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_uge_2.0_f32:
 ; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_uge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_uge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -453,8 +453,8 @@ define amdgpu_kernel void @commute_uge_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ult_2.0_f32:
 ; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ult_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ult_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -466,8 +466,8 @@ define amdgpu_kernel void @commute_ult_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ule_2.0_f32:
 ; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_ule_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ule_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -479,8 +479,8 @@ define amdgpu_kernel void @commute_ule_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_une_2.0_f32:
 ; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}}
-define amdgpu_kernel void @commute_une_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_une_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -492,8 +492,8 @@ define amdgpu_kernel void @commute_une_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_uno_2.0_f32:
 ; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
-define amdgpu_kernel void @commute_uno_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_uno_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load float, ptr addrspace(1) %gep.in
@@ -510,8 +510,8 @@ define amdgpu_kernel void @commute_uno_2.0_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_oeq_2.0_f64:
 ; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_oeq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_oeq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -524,8 +524,8 @@ define amdgpu_kernel void @commute_oeq_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ogt_2.0_f64:
 ; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ogt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ogt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -537,8 +537,8 @@ define amdgpu_kernel void @commute_ogt_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_oge_2.0_f64:
 ; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_oge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_oge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -550,8 +550,8 @@ define amdgpu_kernel void @commute_oge_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_olt_2.0_f64:
 ; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_olt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_olt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -563,8 +563,8 @@ define amdgpu_kernel void @commute_olt_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ole_2.0_f64:
 ; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ole_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ole_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -576,8 +576,8 @@ define amdgpu_kernel void @commute_ole_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_one_2.0_f64:
 ; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_one_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_one_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -589,8 +589,8 @@ define amdgpu_kernel void @commute_one_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ord_2.0_f64:
 ; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
-define amdgpu_kernel void @commute_ord_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ord_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -602,8 +602,8 @@ define amdgpu_kernel void @commute_ord_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ueq_2.0_f64:
 ; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ueq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ueq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -615,8 +615,8 @@ define amdgpu_kernel void @commute_ueq_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ugt_2.0_f64:
 ; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ugt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ugt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -628,8 +628,8 @@ define amdgpu_kernel void @commute_ugt_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_uge_2.0_f64:
 ; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_uge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_uge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -641,8 +641,8 @@ define amdgpu_kernel void @commute_uge_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ult_2.0_f64:
 ; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ult_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ult_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -654,8 +654,8 @@ define amdgpu_kernel void @commute_ult_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_ule_2.0_f64:
 ; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_ule_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_ule_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -667,8 +667,8 @@ define amdgpu_kernel void @commute_ule_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_une_2.0_f64:
 ; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @commute_une_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_une_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -680,8 +680,8 @@ define amdgpu_kernel void @commute_une_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}commute_uno_2.0_f64:
 ; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
-define amdgpu_kernel void @commute_uno_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @commute_uno_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %val = load double, ptr addrspace(1) %gep.in
@@ -701,7 +701,7 @@ define amdgpu_kernel void @commute_uno_2.0_f64(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
 ; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]]
-define amdgpu_kernel void @commute_frameindex(ptr addrspace(1) nocapture %out) #0 {
+define amdgpu_kernel void @commute_frameindex(ptr addrspace(1) nocapture %out) nounwind readnone {
 entry:
   %stack0 = alloca i32, addrspace(5)
   %ptr0 = load volatile ptr addrspace(5), ptr addrspace(1) undef
@@ -710,6 +710,3 @@ entry:
   store volatile i32 %ext, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 849348a7be53dd..0c351379f63d7f 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
 
-define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
+define amdgpu_ps float @main(float %arg0, float %arg1) nounwind {
 ; SI-LABEL: main:
 ; SI:       ; %bb.0: ; %bb
 ; SI-NEXT:    image_load v2, v0, s[0:7] dmask:0x1 unorm
@@ -43,9 +43,5 @@ bb:
   ret float %tmp9
 }
 
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll b/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll
index d9eca0d1cca9a2..64ad93c40ae7ea 100644
--- a/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll
@@ -1,18 +1,18 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
 declare float @llvm.fma.f32(float, float, float) nounwind readnone
 
 ; FUNC-LABEL: @commute_add_imm_fabs_f32
 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, 2.0
 ; SI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @commute_add_imm_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @commute_add_imm_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %x = load float, ptr addrspace(1) %gep.0
-  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+  %x.fabs = call float @llvm.fabs.f32(float %x) nounwind readnone
   %z = fadd float 2.0, %x.fabs
   store float %z, ptr addrspace(1) %out
   ret void
@@ -22,11 +22,11 @@ define amdgpu_kernel void @commute_add_imm_fabs_f32(ptr addrspace(1) %out, ptr a
 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -4.0
 ; SI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @commute_mul_imm_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @commute_mul_imm_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %x = load float, ptr addrspace(1) %gep.0
-  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+  %x.fabs = call float @llvm.fabs.f32(float %x) nounwind readnone
   %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
   %z = fmul float 4.0, %x.fneg.fabs
   store float %z, ptr addrspace(1) %out
@@ -37,8 +37,8 @@ define amdgpu_kernel void @commute_mul_imm_fneg_fabs_f32(ptr addrspace(1) %out,
 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
 ; SI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @commute_mul_imm_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @commute_mul_imm_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %x = load float, ptr addrspace(1) %gep.0
   %x.fneg = fsub float -0.000000e+00, %x
@@ -53,11 +53,11 @@ define amdgpu_kernel void @commute_mul_imm_fneg_f32(ptr addrspace(1) %out, ptr a
 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x44800000
 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
 ; SI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @commute_add_lit_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @commute_add_lit_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %x = load float, ptr addrspace(1) %gep.0
-  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+  %x.fabs = call float @llvm.fabs.f32(float %x) nounwind readnone
   %z = fadd float 1024.0, %x.fabs
   store float %z, ptr addrspace(1) %out
   ret void
@@ -68,13 +68,13 @@ define amdgpu_kernel void @commute_add_lit_fabs_f32(ptr addrspace(1) %out, ptr a
 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
 ; SI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @commute_add_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @commute_add_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %x = load volatile float, ptr addrspace(1) %gep.0
   %y = load volatile float, ptr addrspace(1) %gep.1
-  %y.fabs = call float @llvm.fabs.f32(float %y) #1
+  %y.fabs = call float @llvm.fabs.f32(float %y) nounwind readnone
   %z = fadd float %x, %y.fabs
   store float %z, ptr addrspace(1) %out
   ret void
@@ -85,8 +85,8 @@ define amdgpu_kernel void @commute_add_fabs_f32(ptr addrspace(1) %out, ptr addrs
 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
 ; SI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @commute_mul_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @commute_mul_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %x = load volatile float, ptr addrspace(1) %gep.0
@@ -102,13 +102,13 @@ define amdgpu_kernel void @commute_mul_fneg_f32(ptr addrspace(1) %out, ptr addrs
 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
 ; SI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @commute_mul_fabs_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @commute_mul_fabs_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %x = load volatile float, ptr addrspace(1) %gep.0
   %y = load volatile float, ptr addrspace(1) %gep.1
-  %y.fabs = call float @llvm.fabs.f32(float %y) #1
+  %y.fabs = call float @llvm.fabs.f32(float %y) nounwind readnone
   %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
   %z = fmul float %x, %y.fabs.fneg
   store float %z, ptr addrspace(1) %out
@@ -121,14 +121,14 @@ define amdgpu_kernel void @commute_mul_fabs_fneg_f32(ptr addrspace(1) %out, ptr
 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
 ; SI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @commute_mul_fabs_x_fabs_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @commute_mul_fabs_x_fabs_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %x = load volatile float, ptr addrspace(1) %gep.0
   %y = load volatile float, ptr addrspace(1) %gep.1
-  %x.fabs = call float @llvm.fabs.f32(float %x) #1
-  %y.fabs = call float @llvm.fabs.f32(float %y) #1
+  %x.fabs = call float @llvm.fabs.f32(float %x) nounwind readnone
+  %y.fabs = call float @llvm.fabs.f32(float %y) nounwind readnone
   %z = fmul float %x.fabs, %y.fabs
   store float %z, ptr addrspace(1) %out
   ret void
@@ -139,14 +139,14 @@ define amdgpu_kernel void @commute_mul_fabs_x_fabs_y_f32(ptr addrspace(1) %out,
 ; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
 ; SI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @commute_mul_fabs_x_fneg_fabs_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @commute_mul_fabs_x_fneg_fabs_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %x = load volatile float, ptr addrspace(1) %gep.0
   %y = load volatile float, ptr addrspace(1) %gep.1
-  %x.fabs = call float @llvm.fabs.f32(float %x) #1
-  %y.fabs = call float @llvm.fabs.f32(float %y) #1
+  %x.fabs = call float @llvm.fabs.f32(float %x) nounwind readnone
+  %y.fabs = call float @llvm.fabs.f32(float %y) nounwind readnone
   %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
   %z = fmul float %x.fabs, %y.fabs.fneg
   store float %z, ptr addrspace(1) %out
@@ -176,6 +176,3 @@ define amdgpu_kernel void @fma_a_2.0_neg_b_f32(ptr addrspace(1) %out, ptr addrsp
   store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
index 8cb7d6651a08c2..ac15ff859c96dd 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
@@ -11,7 +11,7 @@
 ; SICI-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x2
 ; GFX9-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
 ; GFX9-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
-define amdgpu_vs float @load_i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs float @load_i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds i32, ptr addrspace(6) %p1, i32 2
   %r0 = load i32, ptr addrspace(6) %p0
   %r1 = load i32, ptr addrspace(6) %gep1
@@ -33,7 +33,7 @@ define amdgpu_vs float @load_i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) in
 ; GFX9-DAG: s_mov_b32 s1, s3
 ; GFX9-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
 ; GFX9-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
-define amdgpu_vs <2 x float> @load_v2i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs <2 x float> @load_v2i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds <2 x i32>, ptr addrspace(6) %p1, i32 2
   %r0 = load <2 x i32>, ptr addrspace(6) %p0
   %r1 = load <2 x i32>, ptr addrspace(6) %gep1
@@ -52,7 +52,7 @@ define amdgpu_vs <2 x float> @load_v2i32(ptr addrspace(6) inreg %p0, ptr addrspa
 ; VI-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
 ; GFX9-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
 ; GFX9-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
-define amdgpu_vs <4 x float> @load_v4i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs <4 x float> @load_v4i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds <4 x i32>, ptr addrspace(6) %p1, i32 2
   %r0 = load <4 x i32>, ptr addrspace(6) %p0
   %r1 = load <4 x i32>, ptr addrspace(6) %gep1
@@ -71,7 +71,7 @@ define amdgpu_vs <4 x float> @load_v4i32(ptr addrspace(6) inreg %p0, ptr addrspa
 ; VI-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
 ; GFX9-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
 ; GFX9-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
-define amdgpu_vs <8 x float> @load_v8i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs <8 x float> @load_v8i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds <8 x i32>, ptr addrspace(6) %p1, i32 2
   %r0 = load <8 x i32>, ptr addrspace(6) %p0
   %r1 = load <8 x i32>, ptr addrspace(6) %gep1
@@ -90,7 +90,7 @@ define amdgpu_vs <8 x float> @load_v8i32(ptr addrspace(6) inreg %p0, ptr addrspa
 ; VI-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
 ; GFX9-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
 ; GFX9-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
-define amdgpu_vs <16 x float> @load_v16i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs <16 x float> @load_v16i32(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds <16 x i32>, ptr addrspace(6) %p1, i32 2
   %r0 = load <16 x i32>, ptr addrspace(6) %p0
   %r1 = load <16 x i32>, ptr addrspace(6) %gep1
@@ -109,7 +109,7 @@ define amdgpu_vs <16 x float> @load_v16i32(ptr addrspace(6) inreg %p0, ptr addrs
 ; VI-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
 ; GFX9-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
 ; GFX9-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
-define amdgpu_vs float @load_float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs float @load_float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds float, ptr addrspace(6) %p1, i32 2
   %r0 = load float, ptr addrspace(6) %p0
   %r1 = load float, ptr addrspace(6) %gep1
@@ -130,7 +130,7 @@ define amdgpu_vs float @load_float(ptr addrspace(6) inreg %p0, ptr addrspace(6)
 ; GFX9-DAG: s_mov_b32 s1, s3
 ; GFX9-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
 ; GFX9-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
-define amdgpu_vs <2 x float> @load_v2float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs <2 x float> @load_v2float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds <2 x float>, ptr addrspace(6) %p1, i32 2
   %r0 = load <2 x float>, ptr addrspace(6) %p0
   %r1 = load <2 x float>, ptr addrspace(6) %gep1
@@ -148,7 +148,7 @@ define amdgpu_vs <2 x float> @load_v2float(ptr addrspace(6) inreg %p0, ptr addrs
 ; VI-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
 ; GFX9-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
 ; GFX9-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
-define amdgpu_vs <4 x float> @load_v4float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs <4 x float> @load_v4float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds <4 x float>, ptr addrspace(6) %p1, i32 2
   %r0 = load <4 x float>, ptr addrspace(6) %p0
   %r1 = load <4 x float>, ptr addrspace(6) %gep1
@@ -166,7 +166,7 @@ define amdgpu_vs <4 x float> @load_v4float(ptr addrspace(6) inreg %p0, ptr addrs
 ; VI-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
 ; GFX9-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
 ; GFX9-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
-define amdgpu_vs <8 x float> @load_v8float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs <8 x float> @load_v8float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds <8 x float>, ptr addrspace(6) %p1, i32 2
   %r0 = load <8 x float>, ptr addrspace(6) %p0
   %r1 = load <8 x float>, ptr addrspace(6) %gep1
@@ -184,7 +184,7 @@ define amdgpu_vs <8 x float> @load_v8float(ptr addrspace(6) inreg %p0, ptr addrs
 ; VI-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
 ; GFX9-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
 ; GFX9-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
-define amdgpu_vs <16 x float> @load_v16float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) #0 {
+define amdgpu_vs <16 x float> @load_v16float(ptr addrspace(6) inreg %p0, ptr addrspace(6) inreg %p1) nounwind {
   %gep1 = getelementptr inbounds <16 x float>, ptr addrspace(6) %p1, i32 2
   %r0 = load <16 x float>, ptr addrspace(6) %p0
   %r1 = load <16 x float>, ptr addrspace(6) %gep1
@@ -195,7 +195,7 @@ define amdgpu_vs <16 x float> @load_v16float(ptr addrspace(6) inreg %p0, ptr add
 ; GCN-LABEL: {{^}}load_i32_hi0:
 ; GCN: s_mov_b32 s1, 0
 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
-define amdgpu_vs i32 @load_i32_hi0(ptr addrspace(6) inreg %p) #1 {
+define amdgpu_vs i32 @load_i32_hi0(ptr addrspace(6) inreg %p) nounwind "amdgpu-32bit-address-high-bits"="0" {
   %r0 = load i32, ptr addrspace(6) %p
   ret i32 %r0
 }
@@ -203,7 +203,7 @@ define amdgpu_vs i32 @load_i32_hi0(ptr addrspace(6) inreg %p) #1 {
 ; GCN-LABEL: {{^}}load_i32_hi1:
 ; GCN: s_mov_b32 s1, 1
 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
-define amdgpu_vs i32 @load_i32_hi1(ptr addrspace(6) inreg %p) #2 {
+define amdgpu_vs i32 @load_i32_hi1(ptr addrspace(6) inreg %p) nounwind "amdgpu-32bit-address-high-bits"="1" {
   %r0 = load i32, ptr addrspace(6) %p
   ret i32 %r0
 }
@@ -211,7 +211,7 @@ define amdgpu_vs i32 @load_i32_hi1(ptr addrspace(6) inreg %p) #2 {
 ; GCN-LABEL: {{^}}load_i32_hiffff8000:
 ; GCN: s_movk_i32 s1, 0x8000
 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
-define amdgpu_vs i32 @load_i32_hiffff8000(ptr addrspace(6) inreg %p) #3 {
+define amdgpu_vs i32 @load_i32_hiffff8000(ptr addrspace(6) inreg %p) nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" {
   %r0 = load i32, ptr addrspace(6) %p
   ret i32 %r0
 }
@@ -219,7 +219,7 @@ define amdgpu_vs i32 @load_i32_hiffff8000(ptr addrspace(6) inreg %p) #3 {
 ; GCN-LABEL: {{^}}load_i32_hifffffff0:
 ; GCN: s_mov_b32 s1, -16
 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
-define amdgpu_vs i32 @load_i32_hifffffff0(ptr addrspace(6) inreg %p) #4 {
+define amdgpu_vs i32 @load_i32_hifffffff0(ptr addrspace(6) inreg %p) nounwind "amdgpu-32bit-address-high-bits"="0xfffffff0" {
   %r0 = load i32, ptr addrspace(6) %p
   ret i32 %r0
 }
@@ -230,9 +230,9 @@ define amdgpu_vs i32 @load_i32_hifffffff0(ptr addrspace(6) inreg %p) #4 {
 ; GCN: s_load_dwordx8
 ; GCN-NEXT: s_load_dwordx4
 ; GCN: image_sample
-define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) "InitialPSInputAddr"="45175" {
 main_body:
-  %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
+  %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) nounwind readnone
   %23 = bitcast float %22 to i32
   %24 = shl i32 %23, 1
   %25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24, !amdgpu.uniform !0
@@ -241,7 +241,7 @@ main_body:
   %28 = or disjoint i32 %27, 3
   %29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28, !amdgpu.uniform !0
   %30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
-  %31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8
+  %31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) nounwind readnone
   %32 = extractelement <4 x float> %31, i32 0
   %33 = extractelement <4 x float> %31, i32 1
   %34 = extractelement <4 x float> %31, i32 2
@@ -262,9 +262,9 @@ main_body:
 ; GCN: s_load_dwordx8
 ; GCN-NEXT: s_load_dwordx4
 ; GCN: image_sample
-define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform(ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), ptr addrspace(6) inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) "InitialPSInputAddr"="45175" {
 main_body:
-  %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
+  %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) nounwind readnone
   %23 = bitcast float %22 to i32
   %24 = shl i32 %23, 1
   %25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24
@@ -273,7 +273,7 @@ main_body:
   %28 = or disjoint i32 %27, 3
   %29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28
   %30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
-  %31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8
+  %31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) nounwind readnone
   %32 = extractelement <4 x float> %31, i32 0
   %33 = extractelement <4 x float> %31, i32 1
   %34 = extractelement <4 x float> %31, i32 2
@@ -292,7 +292,7 @@ main_body:
 ; GCN-DAG: s_add_i32 s0, s0, 4
 ; GCN-DAG: s_mov_b32 s1, 0
 ; GCN: s_load_dword s{{[0-9]}}, s[0:1], 0x0
-define amdgpu_vs float @load_addr_no_fold(ptr addrspace(6) inreg noalias %p0) #0 {
+define amdgpu_vs float @load_addr_no_fold(ptr addrspace(6) inreg noalias %p0) nounwind {
   %gep1 = getelementptr i32, ptr addrspace(6) %p0, i32 1
   %r1 = load i32, ptr addrspace(6) %gep1
   %r2 = bitcast i32 %r1 to float
@@ -306,26 +306,16 @@ define amdgpu_vs float @load_addr_no_fold(ptr addrspace(6) inreg noalias %p0) #0
 define amdgpu_vs float @vgpr_arg_src(ptr addrspace(6) %arg) {
 main_body:
   %tmp9 = load ptr addrspace(8), ptr addrspace(6) %arg
-  %tmp10 = call nsz float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp9, i32 undef, i32 0, i32 0, i32 0) #1
+  %tmp10 = call nsz float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp9, i32 undef, i32 0, i32 0, i32 0) nounwind "amdgpu-32bit-address-high-bits"="0"
   ret float %tmp10
 }
 
 ; Function Attrs: nounwind readnone speculatable
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) nounwind readnone speculatable
 
 ; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind memory(argmem: read)
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #7
+declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) nounwind memory(argmem: read)
 
 !0 = !{}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-32bit-address-high-bits"="0" }
-attributes #2 = { nounwind "amdgpu-32bit-address-high-bits"="1" }
-attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" }
-attributes #4 = { nounwind "amdgpu-32bit-address-high-bits"="0xfffffff0" }
-attributes #5 = { "InitialPSInputAddr"="45175" }
-attributes #6 = { nounwind readnone speculatable }
-attributes #7 = { nounwind memory(argmem: read) }
-attributes #8 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
index 236dee7c3b825d..a2696c83cb61b8 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
@@ -6,7 +6,7 @@
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_dword [[RESULT]]
 define amdgpu_kernel void @fold_mi_v_and_0(ptr addrspace(1) %out) {
-  %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %and = and i32 %size, %x
   store i32 %and, ptr addrspace(1) %out
@@ -17,7 +17,7 @@ define amdgpu_kernel void @fold_mi_v_and_0(ptr addrspace(1) %out) {
 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @fold_mi_s_and_0(ptr addrspace(1) %out, i32 %x) #0 {
+define amdgpu_kernel void @fold_mi_s_and_0(ptr addrspace(1) %out, i32 %x) nounwind {
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %and = and i32 %size, %x
   store i32 %and, ptr addrspace(1) %out
@@ -29,7 +29,7 @@ define amdgpu_kernel void @fold_mi_s_and_0(ptr addrspace(1) %out, i32 %x) #0 {
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_dword [[RESULT]]
 define amdgpu_kernel void @fold_mi_v_or_0(ptr addrspace(1) %out) {
-  %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %or = or i32 %size, %x
   store i32 %or, ptr addrspace(1) %out
@@ -42,7 +42,7 @@ define amdgpu_kernel void @fold_mi_v_or_0(ptr addrspace(1) %out) {
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
 ; GCN-NOT: [[VVAL]]
 ; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @fold_mi_s_or_0(ptr addrspace(1) %out, i32 %x) #0 {
+define amdgpu_kernel void @fold_mi_s_or_0(ptr addrspace(1) %out, i32 %x) nounwind {
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %or = or i32 %size, %x
   store i32 %or, ptr addrspace(1) %out
@@ -54,7 +54,7 @@ define amdgpu_kernel void @fold_mi_s_or_0(ptr addrspace(1) %out, i32 %x) #0 {
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_dword [[RESULT]]
 define amdgpu_kernel void @fold_mi_v_xor_0(ptr addrspace(1) %out) {
-  %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %xor = xor i32 %size, %x
   store i32 %xor, ptr addrspace(1) %out
@@ -67,7 +67,7 @@ define amdgpu_kernel void @fold_mi_v_xor_0(ptr addrspace(1) %out) {
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
 ; GCN-NOT: [[VVAL]]
 ; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @fold_mi_s_xor_0(ptr addrspace(1) %out, i32 %x) #0 {
+define amdgpu_kernel void @fold_mi_s_xor_0(ptr addrspace(1) %out, i32 %x) nounwind {
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %xor = xor i32 %size, %x
   store i32 %xor, ptr addrspace(1) %out
@@ -78,7 +78,7 @@ define amdgpu_kernel void @fold_mi_s_xor_0(ptr addrspace(1) %out, i32 %x) #0 {
 ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], -1{{$}}
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @fold_mi_s_not_0(ptr addrspace(1) %out, i32 %x) #0 {
+define amdgpu_kernel void @fold_mi_s_not_0(ptr addrspace(1) %out, i32 %x) nounwind {
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %xor = xor i32 %size, -1
   store i32 %xor, ptr addrspace(1) %out
@@ -136,9 +136,6 @@ define amdgpu_kernel void @fold_mi_and_neg1(ptr addrspace(1) %out) {
   ret void
 }
 
-declare i64 @llvm.ctpop.i64(i64) #1
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare i32 @llvm.amdgcn.groupstaticsize() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.groupstaticsize() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 789150f690d52e..15cc926700df4c 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -63,7 +63,7 @@
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]]
 
 ; VGPR: .amdhsa_private_segment_fixed_size 16
-define amdgpu_kernel void @divergent_if_endif(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @divergent_if_endif(ptr addrspace(1) %out) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %load0 = load volatile i32, ptr addrspace(3) undef
@@ -134,7 +134,7 @@ endif:
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]]
 
 ; VGPR: .amdhsa_private_segment_fixed_size 20
-define amdgpu_kernel void @divergent_loop(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @divergent_loop(ptr addrspace(1) %out) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %load0 = load volatile i32, ptr addrspace(3) null
@@ -248,7 +248,7 @@ end:
 ; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]]
-define amdgpu_kernel void @divergent_if_else_endif(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @divergent_if_else_endif(ptr addrspace(1) %out) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %load0 = load volatile i32, ptr addrspace(3) null
@@ -271,10 +271,7 @@ endif:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-optnone.ll b/llvm/test/CodeGen/AMDGPU/control-flow-optnone.ll
index 5ceea9ef63a4a5..91b3ddf89a3347 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-optnone.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-optnone.ll
@@ -19,7 +19,7 @@
 ; GCN: {{[s|v]}}_cmp_eq_u32
 ; GCN: s_cbranch_execz
 ; GCN-NEXT: s_branch
-define amdgpu_kernel void @copytoreg_divergent_brcond(i32 %arg, i32 %arg1, i32 %arg2) #0 {
+define amdgpu_kernel void @copytoreg_divergent_brcond(i32 %arg, i32 %arg1, i32 %arg2) nounwind optnone noinline {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp3 = zext i32 %tmp to i64
@@ -49,7 +49,4 @@ bb14:                                             ; preds = %bb13, %bb9
   br i1 %tmp15, label %bb8.loopexit, label %bb9
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind optnone noinline }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll b/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
index 6beccce9400e58..686666f2648519 100644
--- a/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
+++ b/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
@@ -7,7 +7,7 @@
 ;        ISEL:    {{.*}} SI_CALL_ISEL {{.*}}, @foo, [[TOKEN]], csr_amdgpu, {{.*}}
 ;      DEADMI:    {{.*}} SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
 ;       GISEL:    {{.*}} G_SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
-define i32 @basic_call(i32 %src) #0 {
+define i32 @basic_call(i32 %src) nounwind readnone convergent {
   %t = call token @llvm.experimental.convergence.entry()
   %r = call i32 @foo(i32 %src) [ "convergencectrl"(token %t) ]
   ret i32 %r
@@ -19,7 +19,7 @@ define i32 @basic_call(i32 %src) #0 {
 ;  DEADMI-NOT:    CONVERGENCECTRL_GLUE
 ;        ISEL:    {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
 ;       GISEL:    {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[TOKEN]]
-define i32 @basic_intrinsic(i32 %src) #0 {
+define i32 @basic_intrinsic(i32 %src) nounwind readnone convergent {
   %t = call token @llvm.experimental.convergence.anchor()
   %r = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t) ]
   ret i32 %r
@@ -27,7 +27,7 @@ define i32 @basic_intrinsic(i32 %src) #0 {
 
 ; There's nothing to check here. The test is just meant to catch any crashes
 ; when a convergent call has no token.
-define i32 @uncontrolled_call(i32 %src) #0 {
+define i32 @uncontrolled_call(i32 %src) nounwind readnone convergent {
   %r = call i32 @foo(i32 %src)
   ret i32 %r
 }
@@ -40,7 +40,7 @@ define i32 @uncontrolled_call(i32 %src) #0 {
 ;  DEADMI-NOT:    CONVERGENCECTRL_GLUE
 ;        ISEL:    {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
 ;       GISEL:    {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[TOKEN]]
-define i32 @basic_branch(i32 %src, i1 %cond) #0 {
+define i32 @basic_branch(i32 %src, i1 %cond) nounwind readnone convergent {
 entry:
   %t = call token @llvm.experimental.convergence.anchor()
   %x = add i32 %src, 1
@@ -63,7 +63,7 @@ else:
 ;  DEADMI-NOT:    CONVERGENCECTRL_GLUE
 ;        ISEL:    {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[LOOP]]
 ;       GISEL:    {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[LOOP]]
-define i32 @basic_loop(i32 %src, i1 %cond) #0 {
+define i32 @basic_loop(i32 %src, i1 %cond) nounwind readnone convergent {
   %t1 = call token @llvm.experimental.convergence.anchor()
   br label %loop
 
@@ -83,7 +83,7 @@ end:
 ;       GISEL:    {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[ANCHOR]]
 ;        ISEL:    {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[ENTRY]]
 ;       GISEL:    {{.*}} = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane){{.*}}, implicit [[ENTRY]]
-define i32 @nested(i32 %src) #0 {
+define i32 @nested(i32 %src) nounwind readnone convergent {
   %t1 = call token @llvm.experimental.convergence.entry()
   %t2 = call token @llvm.experimental.convergence.anchor()
   %r2 = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ]
@@ -101,20 +101,17 @@ define i32 @nested(i32 %src) #0 {
 ; COM:   ISEL:    {{.*}} SI_CALL_ISEL {{.*}}, @external_void_func_void, [[TOKEN]], csr_amdgpu, {{.*}}
 ; COM: DEADMI:    {{.*}} SI_CALL {{.*}}, @external_void_func_void, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
 ;       GISEL:    {{.*}} SI_TCRETURN {{.*}}, @external_void_func_void, 0, csr_amdgpu, implicit [[TOKEN]]
-define void @tail_call_void_func_void() #0 {
+define void @tail_call_void_func_void() nounwind readnone convergent {
   %t1 = call token @llvm.experimental.convergence.entry()
   tail call void @external_void_func_void() [ "convergencectrl"(token %t1) ]
   ret void
 }
 
-declare hidden void @external_void_func_void() #0
-declare i32 @foo(i32 %x) #0
+declare hidden void @external_void_func_void() nounwind readnone convergent
+declare i32 @foo(i32 %x) nounwind readnone convergent
 
-declare i32 @llvm.amdgcn.readfirstlane(i32) #0
+declare i32 @llvm.amdgcn.readfirstlane(i32) nounwind readnone convergent
 
 declare token @llvm.experimental.convergence.entry()
 declare token @llvm.experimental.convergence.anchor()
 declare token @llvm.experimental.convergence.loop()
-
-attributes #0 = { nounwind readnone convergent }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll b/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
index bd523d4ac30b90..062859b8907a21 100644
--- a/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
+++ b/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 ; GCN-LABEL: {{^}}convergent_inlineasm:
 ; GCN: %bb.0:
 ; GCN: v_cmp_ne_u32_e64
@@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
 define amdgpu_kernel void @convergent_inlineasm(ptr addrspace(1) nocapture %arg) {
 bb:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
-  %tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1) #1
+  %tmp1 = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 1) convergent nounwind readnone
   %tmp2 = icmp eq i32 %tmp, 8
   br i1 %tmp2, label %bb3, label %bb5
 
@@ -45,6 +45,3 @@ bb3:                                              ; preds = %bb
 bb5:                                              ; preds = %bb3, %bb
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { convergent nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir b/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir
index 577d38e656685e..df8d4710fbc3ec 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/copy-vgpr-clobber-spill-vgpr.mir
@@ -4,7 +4,7 @@
 
 --- |
 
-  define amdgpu_kernel void @test_spill() #0 {
+  define amdgpu_kernel void @test_spill() "amdgpu-waves-per-eu"="4,4" "amdgpu-no-agpr" {
   ; GFX908-LABEL: test_spill:
   ; GFX908:       ; %bb.0:
   ; GFX908-NEXT:    ; implicit-def: $agpr96_agpr97_agpr98_agpr99_agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111
@@ -333,8 +333,6 @@
     ret void
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="4,4" "amdgpu-no-agpr" }
-
 ...
 ---
 name:            test_spill
diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 09dc6d6bff9e31..eb2bdc3763b582 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -23,7 +23,7 @@
 ; since that didn't look like an ABI copy.
 
 
-define float @call_split_type_used_outside_block_v2f32() #0 {
+define float @call_split_type_used_outside_block_v2f32() nounwind {
 ; GCN-LABEL: call_split_type_used_outside_block_v2f32:
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -59,7 +59,7 @@ bb1:
   ret float %extract
 }
 
-define float @call_split_type_used_outside_block_v3f32() #0 {
+define float @call_split_type_used_outside_block_v3f32() nounwind {
 ; GCN-LABEL: call_split_type_used_outside_block_v3f32:
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -95,7 +95,7 @@ bb1:
   ret float %extract
 }
 
-define half @call_split_type_used_outside_block_v4f16() #0 {
+define half @call_split_type_used_outside_block_v4f16() nounwind {
 ; GCN-LABEL: call_split_type_used_outside_block_v4f16:
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -131,7 +131,7 @@ bb1:
   ret half %extract
 }
 
-define { i32, half } @call_split_type_used_outside_block_struct() #0 {
+define { i32, half } @call_split_type_used_outside_block_struct() nounwind {
 ; GCN-LABEL: call_split_type_used_outside_block_struct:
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -173,7 +173,7 @@ bb1:
   ret { i32, half } %ins1
 }
 
-define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
+define amdgpu_kernel void @v3i16_registers(i1 %cond) nounwind {
 ; GCN-LABEL: v3i16_registers:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
@@ -213,7 +213,7 @@ if.then:                                          ; preds = %entry
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  %call6 = tail call <3 x i16> @func_v3i16() #0
+  %call6 = tail call <3 x i16> @func_v3i16() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
@@ -222,7 +222,7 @@ if.end:                                           ; preds = %if.else, %if.then
   ret void
 }
 
-define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
+define amdgpu_kernel void @v3f16_registers(i1 %cond) nounwind {
 ; GCN-LABEL: v3f16_registers:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
@@ -262,7 +262,7 @@ if.then:                                          ; preds = %entry
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  %call6 = tail call <3 x half> @func_v3f16() #0
+  %call6 = tail call <3 x half> @func_v3f16() nounwind
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
@@ -271,16 +271,14 @@ if.end:                                           ; preds = %if.else, %if.then
   ret void
 }
 
-declare hidden <2 x float> @func_v2f32() #0
-declare hidden <3 x float> @func_v3f32() #0
-declare hidden <4 x float> @func_v4f32() #0
-declare hidden <4 x half> @func_v4f16() #0
+declare hidden <2 x float> @func_v2f32() nounwind
+declare hidden <3 x float> @func_v3f32() nounwind
+declare hidden <4 x float> @func_v4f32() nounwind
+declare hidden <4 x half> @func_v4f16() nounwind
 declare hidden <3 x i16> @func_v3i16()
 declare hidden <3 x half> @func_v3f16()
 
-declare hidden { <4 x i32>, <4 x half> } @func_struct() #0
-
-attributes #0 = { nounwind}
+declare hidden { <4 x i32>, <4 x half> } @func_struct() nounwind
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll b/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
index c98da812647443..22ff00ed3e3379 100644
--- a/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
+++ b/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
@@ -11,7 +11,7 @@
 ; CHECK: BB0_3:
 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, [[SREG]]
 
-define amdgpu_ps void @mov_opt(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
+define amdgpu_ps void @mov_opt(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr nounwind {
 bb:
   %tmp = icmp eq i32 %arg1, 0
   br i1 %tmp, label %bb3, label %bb10
@@ -31,12 +31,9 @@ bb9:                                              ; preds = %bb5
 
 bb10:                                             ; preds = %bb9, %bb5, %bb3, %bb
   %tmp11 = phi float [ 1.000000e+00, %bb3 ], [ 0.000000e+00, %bb9 ], [ 1.000000e+00, %bb ], [ undef, %bb5 ]
-  call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %tmp11, float undef, float undef, float undef, i1 immarg false, i1 immarg false) #0
+  call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %tmp11, float undef, float undef, float undef, i1 immarg false, i1 immarg false) nounwind
   ret void
 }
 
 ; Function Attrs: inaccessiblememonly nounwind
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { inaccessiblememonly nounwind }
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) inaccessiblememonly nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/cube.ll b/llvm/test/CodeGen/AMDGPU/cube.ll
index 49d8276eda4182..ec9feb5b491bb4 100644
--- a/llvm/test/CodeGen/AMDGPU/cube.ll
+++ b/llvm/test/CodeGen/AMDGPU/cube.ll
@@ -1,10 +1,10 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubeid(float, float, float) #0
-declare float @llvm.amdgcn.cubesc(float, float, float) #0
-declare float @llvm.amdgcn.cubetc(float, float, float) #0
-declare float @llvm.amdgcn.cubema(float, float, float) #0
+declare float @llvm.amdgcn.cubeid(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.cubesc(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.cubetc(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.cubema(float, float, float) nounwind readnone
 
 ; GCN-LABEL: {{^}}cube:
 ; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -12,7 +12,7 @@ declare float @llvm.amdgcn.cubema(float, float, float) #0
 ; GCN-DAG: v_cubetc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DAG: v_cubema_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: _store_dwordx4
-define amdgpu_kernel void @cube(ptr addrspace(1) %out, float %a, float %b, float %c) #1 {
+define amdgpu_kernel void @cube(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind {
   %cubeid = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
   %cubesc = call float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
   %cubetc = call float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
@@ -25,6 +25,3 @@ define amdgpu_kernel void @cube(ptr addrspace(1) %out, float %a, float %b, float
   store <4 x float> %vec3, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
index c34d669145ddea..3070ca3e4a8f60 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
@@ -2,16 +2,16 @@
 ; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.floor.f32(float) #1
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
 
 ; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0:
 ; SI-SAFE-NOT: v_cvt_flr_i32_f32
 ; SI-NOT: add
 ; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
 ; SI: s_endpgm
-define amdgpu_kernel void @cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0 {
-  %floor = call float @llvm.floor.f32(float %x) #1
+define amdgpu_kernel void @cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) nounwind {
+  %floor = call float @llvm.floor.f32(float %x) nounwind readnone
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
@@ -22,9 +22,9 @@ define amdgpu_kernel void @cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0
 ; SI-SAFE-NOT: v_cvt_flr_i32_f32
 ; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]]
 ; SI: s_endpgm
-define amdgpu_kernel void @cvt_flr_i32_f32_1(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @cvt_flr_i32_f32_1(ptr addrspace(1) %out, float %x) nounwind {
   %fadd = fadd float %x, 1.0
-  %floor = call float @llvm.floor.f32(float %fadd) #1
+  %floor = call float @llvm.floor.f32(float %fadd) nounwind readnone
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
@@ -35,9 +35,9 @@ define amdgpu_kernel void @cvt_flr_i32_f32_1(ptr addrspace(1) %out, float %x) #0
 ; SI-SAFE-NOT: v_cvt_flr_i32_f32
 ; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|
 ; SI: s_endpgm
-define amdgpu_kernel void @cvt_flr_i32_f32_fabs(ptr addrspace(1) %out, float %x) #0 {
-  %x.fabs = call float @llvm.fabs.f32(float %x) #1
-  %floor = call float @llvm.floor.f32(float %x.fabs) #1
+define amdgpu_kernel void @cvt_flr_i32_f32_fabs(ptr addrspace(1) %out, float %x) nounwind {
+  %x.fabs = call float @llvm.fabs.f32(float %x) nounwind readnone
+  %floor = call float @llvm.floor.f32(float %x.fabs) nounwind readnone
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
@@ -48,9 +48,9 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fabs(ptr addrspace(1) %out, float %x)
 ; SI-SAFE-NOT: v_cvt_flr_i32_f32
 ; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
 ; SI: s_endpgm
-define amdgpu_kernel void @cvt_flr_i32_f32_fneg(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @cvt_flr_i32_f32_fneg(ptr addrspace(1) %out, float %x) nounwind {
   %x.fneg = fsub float -0.000000e+00, %x
-  %floor = call float @llvm.floor.f32(float %x.fneg) #1
+  %floor = call float @llvm.floor.f32(float %x.fneg) nounwind readnone
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
@@ -61,10 +61,10 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fneg(ptr addrspace(1) %out, float %x)
 ; SI-SAFE-NOT: v_cvt_flr_i32_f32
 ; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
 ; SI: s_endpgm
-define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) #0 {
-  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) nounwind {
+  %x.fabs = call float @llvm.fabs.f32(float %x) nounwind readnone
   %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
-  %floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1
+  %floor = call float @llvm.floor.f32(float %x.fabs.fneg) nounwind readnone
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
@@ -75,12 +75,9 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(ptr addrspace(1) %out, floa
 ; SI: v_floor_f32
 ; SI: v_cvt_u32_f32_e32
 ; SI: s_endpgm
-define amdgpu_kernel void @no_cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0 {
-  %floor = call float @llvm.floor.f32(float %x) #1
+define amdgpu_kernel void @no_cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) nounwind {
+  %floor = call float @llvm.floor.f32(float %x) nounwind readnone
   %cvt = fptoui float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
index d4bafa12af1293..9711a02c74ebc0 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
@@ -2,16 +2,16 @@
 ; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.floor.f32(float) #1
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
 
 ; FUNC-LABEL: {{^}}cvt_rpi_i32_f32:
 ; SI-SAFE-NOT: v_cvt_rpi_i32_f32
 ; SI-NONAN: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
 ; SI: s_endpgm
-define amdgpu_kernel void @cvt_rpi_i32_f32(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @cvt_rpi_i32_f32(ptr addrspace(1) %out, float %x) nounwind {
   %fadd = fadd float %x, 0.5
-  %floor = call float @llvm.floor.f32(float %fadd) #1
+  %floor = call float @llvm.floor.f32(float %fadd) nounwind readnone
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
@@ -21,10 +21,10 @@ define amdgpu_kernel void @cvt_rpi_i32_f32(ptr addrspace(1) %out, float %x) #0 {
 ; SI-SAFE-NOT: v_cvt_rpi_i32_f32
 ; SI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(ptr addrspace(1) %out, float %x) #0 {
-  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(ptr addrspace(1) %out, float %x) nounwind {
+  %x.fabs = call float @llvm.fabs.f32(float %x) nounwind readnone
   %fadd = fadd float %x.fabs, 0.5
-  %floor = call float @llvm.floor.f32(float %fadd) #1
+  %floor = call float @llvm.floor.f32(float %fadd) nounwind readnone
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
@@ -37,10 +37,10 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(ptr addrspace(1) %out, float %x)
 ; SI-SAFE-NOT: v_cvt_flr_i32_f32
 ; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
 ; SI: s_endpgm
-define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(ptr addrspace(1) %out, float %x) nounwind {
   %x.fneg = fsub float -0.000000e+00, %x
   %fadd = fadd float %x.fneg, 0.5
-  %floor = call float @llvm.floor.f32(float %fadd) #1
+  %floor = call float @llvm.floor.f32(float %fadd) nounwind readnone
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
@@ -55,11 +55,11 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(ptr addrspace(1) %out, float %x)
 ; SI-SAFE-NOT: v_cvt_flr_i32_f32
 ; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
 ; SI: s_endpgm
-define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) #0 {
-  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) nounwind {
+  %x.fabs = call float @llvm.fabs.f32(float %x) nounwind readnone
   %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
   %fadd = fadd float %x.fabs.fneg, 0.5
-  %floor = call float @llvm.floor.f32(float %fadd) #1
+  %floor = call float @llvm.floor.f32(float %fadd) nounwind readnone
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
@@ -71,13 +71,10 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(ptr addrspace(1) %out, floa
 ; SI: v_floor_f32
 ; SI: v_cvt_u32_f32
 ; SI: s_endpgm
-define amdgpu_kernel void @no_cvt_rpi_i32_f32_0(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @no_cvt_rpi_i32_f32_0(ptr addrspace(1) %out, float %x) nounwind {
   %fadd = fadd float %x, 0.5
-  %floor = call float @llvm.floor.f32(float %fadd) #1
+  %floor = call float @llvm.floor.f32(float %fadd) nounwind readnone
   %cvt = fptoui float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index 451f64f71282b2..6a1bb985508adb 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11 %s
 
-define amdgpu_ps float @_amdgpu_ps_main() #0 {
+define amdgpu_ps float @_amdgpu_ps_main() "denormal-fp-math-f32"="preserve-sign" {
 ; GFX10-LABEL: _amdgpu_ps_main:
 ; GFX10:       ; %bb.0: ; %.entry
 ; GFX10-NEXT:    image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
@@ -236,7 +236,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
   ret float %.i2551
 }
 
-define float @fmac_sequence_simple(float %a, float %b, float %c, float %d, float %e) #0 {
+define float @fmac_sequence_simple(float %a, float %b, float %c, float %d, float %e) "denormal-fp-math-f32"="preserve-sign" {
 ; GFX10-LABEL: fmac_sequence_simple:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -260,7 +260,7 @@ define float @fmac_sequence_simple(float %a, float %b, float %c, float %d, float
   ret float %t5
 }
 
-define float @fmac_sequence_innermost_fmul(float %a, float %b, float %c, float %d, float %e, float %f, float %g) #0 {
+define float @fmac_sequence_innermost_fmul(float %a, float %b, float %c, float %d, float %e, float %f, float %g) "denormal-fp-math-f32"="preserve-sign" {
 ; GFX10-LABEL: fmac_sequence_innermost_fmul:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -289,7 +289,7 @@ define float @fmac_sequence_innermost_fmul(float %a, float %b, float %c, float %
   ret float %t5
 }
 
-define float @fmac_sequence_innermost_fmul_swapped_operands(float %a, float %b, float %c, float %d, float %e, float %f, float %g) #0 {
+define float @fmac_sequence_innermost_fmul_swapped_operands(float %a, float %b, float %c, float %d, float %e, float %f, float %g) "denormal-fp-math-f32"="preserve-sign" {
 ; GFX10-LABEL: fmac_sequence_innermost_fmul_swapped_operands:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -318,7 +318,7 @@ define float @fmac_sequence_innermost_fmul_swapped_operands(float %a, float %b,
   ret float %t5
 }
 
-define amdgpu_ps float @fmac_sequence_innermost_fmul_sgpr(float inreg %a, float inreg %b, float inreg %c, float inreg %d, float inreg %e, float inreg %f, float %g) #0 {
+define amdgpu_ps float @fmac_sequence_innermost_fmul_sgpr(float inreg %a, float inreg %b, float inreg %c, float inreg %d, float inreg %e, float inreg %f, float %g) "denormal-fp-math-f32"="preserve-sign" {
 ; GFX10-LABEL: fmac_sequence_innermost_fmul_sgpr:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_mac_f32_e64 v0, s2, s3
@@ -342,7 +342,7 @@ define amdgpu_ps float @fmac_sequence_innermost_fmul_sgpr(float inreg %a, float
   ret float %t5
 }
 
-define amdgpu_ps float @fmac_sequence_innermost_fmul_multiple_use(float inreg %a, float inreg %b, float inreg %c, float inreg %d, float inreg %e, float inreg %f, float %g) #0 {
+define amdgpu_ps float @fmac_sequence_innermost_fmul_multiple_use(float inreg %a, float inreg %b, float inreg %c, float inreg %d, float inreg %e, float inreg %f, float %g) "denormal-fp-math-f32"="preserve-sign" {
 ; GFX10-LABEL: fmac_sequence_innermost_fmul_multiple_use:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_mul_f32_e64 v1, s2, s3
@@ -376,7 +376,7 @@ define amdgpu_ps float @fmac_sequence_innermost_fmul_multiple_use(float inreg %a
 ; "fmul %m, 2.0" could select to an FMA instruction, but it is no better than
 ; selecting it as a multiply. In some cases the multiply is better because
 ; SIFoldOperands can fold it into a previous instruction as an output modifier.
-define amdgpu_ps float @fma_vs_output_modifier(float %x, i32 %n) #0 {
+define amdgpu_ps float @fma_vs_output_modifier(float %x, i32 %n) "denormal-fp-math-f32"="preserve-sign" {
 ; GFX10-LABEL: fma_vs_output_modifier:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_cvt_f32_i32_e64 v1, v1 mul:2
@@ -398,7 +398,7 @@ define amdgpu_ps float @fma_vs_output_modifier(float %x, i32 %n) #0 {
   ret float %r
 }
 
-define amdgpu_ps float @fma_vs_output_modifier_2(float %x) #0 {
+define amdgpu_ps float @fma_vs_output_modifier_2(float %x) "denormal-fp-math-f32"="preserve-sign" {
 ; GCN-LABEL: fma_vs_output_modifier_2:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mul_f32_e64 v0, v0, v0 mul:2
@@ -409,30 +409,25 @@ define amdgpu_ps float @fma_vs_output_modifier_2(float %x) #0 {
 }
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.maxnum.f32(float, float) #1
+declare float @llvm.maxnum.f32(float, float) nofree nosync nounwind readnone speculatable willreturn
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #2
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone speculatable willreturn
 
 ; Function Attrs: nounwind readonly willreturn
-declare <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
+declare <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly willreturn
 
 ; Function Attrs: nounwind readonly willreturn
-declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
+declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly willreturn
 
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
+declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly willreturn
 
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3
+declare <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly willreturn
 
 ; Function Attrs: nounwind readnone willreturn
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #3
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) nounwind readonly willreturn
 
 ; Function Attrs: nounwind readnone willreturn
-declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg) #3
-
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign" }
-attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
-attributes #2 = { nounwind readnone speculatable willreturn }
-attributes #3 = { nounwind readonly willreturn }
+declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg) nounwind readonly willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll
index 18b250d148f5a0..0bacf3c7b3e160 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll
@@ -11,7 +11,7 @@
 
 define amdgpu_kernel void @store_same_base_ptr(ptr addrspace(1) %out) {
 entry:
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %offset = sext i32 %id to i64
   %offset0 = add i64 %offset, 1027
   %ptr0 = getelementptr i32, ptr addrspace(1) %out, i64 %offset0
@@ -28,6 +28,4 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
index cdd4c72f3717f0..7fd7e6db2b7dfb 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
+++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
@@ -2,16 +2,13 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
 --- |
 
-  declare void @llvm.dbg.value(metadata, metadata, metadata) #0
+  declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone speculatable
 
-  define amdgpu_kernel void @could_not_use_debug_inst_to_query_mi2mimap() #1 {
+  define amdgpu_kernel void @could_not_use_debug_inst_to_query_mi2mimap() nounwind {
     ret void
   }
 
-  declare hidden float @foo(float, float, float) local_unnamed_addr #1
-
-  attributes #0 = { nounwind readnone speculatable }
-  attributes #1 = {nounwind }
+  declare hidden float @foo(float, float, float) local_unnamed_addr nounwind
 
 ...
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value.ll b/llvm/test/CodeGen/AMDGPU/debug-value.ll
index 127b23ed3bb23a..834be3ad321919 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/debug-value.ll
@@ -2,13 +2,13 @@
 
 %struct.wombat = type { [4 x i32], [4 x i32], [4 x i32] }
 
-define amdgpu_kernel void @wobble(ptr addrspace(1) nocapture readonly %arg) #0 !dbg !4 {
+define amdgpu_kernel void @wobble(ptr addrspace(1) nocapture readonly %arg) convergent nounwind "target-cpu"="gfx900" !dbg !4 {
 bb:
   %tmp = load i32, ptr addrspace(1) undef, align 4
   %tmp1 = load <4 x float>, ptr addrspace(1) undef, align 16
   %tmp2 = sext i32 %tmp to i64
   %tmp3 = shufflevector <4 x float> undef, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7>
-  %tmp4 = call float @barney() #2
+  %tmp4 = call float @barney() nounwind
   %tmp9 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 0
   %tmp10 = load i32, ptr addrspace(1) %tmp9, align 4
   %tmp11 = sext i32 %tmp10 to i64
@@ -68,26 +68,22 @@ bb28:                                             ; preds = %bb25, %bb21
   ; CHECK-NOT: ;DEBUG_VALUE:
   ; CHECK: ;DEBUG_VALUE: foo:var <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef]
   ; CHECK-NOT: ;DEBUG_VALUE:
-  call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !5
+  call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) nounwind, !dbg !5
   %tmp59 = bitcast i64 %tmp35 to <2 x float>
   %tmp60 = insertelement <2 x float> undef, float %tmp58, i32 0
   %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> undef, <2 x i32> zeroinitializer
   %tmp62 = fmul <2 x float> %tmp61, undef
   %tmp63 = fsub <2 x float> %tmp62, %tmp59
   %tmp64 = extractelement <2 x float> %tmp63, i64 0
-  call void @eggs(float %tmp64) #2
+  call void @eggs(float %tmp64) nounwind
   store <2 x float> %tmp3, ptr addrspace(1) undef, align 8
   store float 0.000000e+00, ptr addrspace(1) undef, align 4
   ret void
 }
 
-declare float @barney() #2
-declare void @eggs(float) #2
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-
-attributes #0 = { convergent nounwind "target-cpu"="gfx900" }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind }
+declare float @barney() nounwind
+declare void @eggs(float) nounwind
+declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone speculatable
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2}
diff --git a/llvm/test/CodeGen/AMDGPU/default-flat-work-group-size-overrides-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/default-flat-work-group-size-overrides-waves-per-eu.ll
index 67061bcb2a785d..4773fb005febe8 100644
--- a/llvm/test/CodeGen/AMDGPU/default-flat-work-group-size-overrides-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-flat-work-group-size-overrides-waves-per-eu.ll
@@ -16,7 +16,7 @@
 ; CHECK-LABEL: @no_flat_workgroup_size(
 ; CHECK: alloca [5 x i32]
 ; CHECK: store i32 4, ptr addrspace(5) %arrayidx1, align 4
-define amdgpu_kernel void @no_flat_workgroup_size(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 {
+define amdgpu_kernel void @no_flat_workgroup_size(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) "amdgpu-waves-per-eu"="1,1" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -38,7 +38,7 @@ entry:
 ; CHECK-LABEL: @explicit_default_workgroup_size(
 ; CHECK: alloca [5 x i32]
 ; CHECK: store i32 4, ptr addrspace(5) %arrayidx1, align 4
-define amdgpu_kernel void @explicit_default_workgroup_size(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #1 {
+define amdgpu_kernel void @explicit_default_workgroup_size(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1024" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -56,6 +56,3 @@ entry:
   store i32 %3, ptr addrspace(1) %arrayidx13
   ret void
 }
-
-attributes #0 = { "amdgpu-waves-per-eu"="1,1" }
-attributes #1 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1024" }
diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
index 49486adda6f2e5..20a43e3f846c4a 100644
--- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -3,7 +3,7 @@
 ; GCN-LABEL: {{^}}test_default_si:
 ; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_default_si(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #0 {
+define amdgpu_kernel void @test_default_si(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "target-cpu"="tahiti" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -12,7 +12,7 @@ define amdgpu_kernel void @test_default_si(ptr addrspace(1) %out0, ptr addrspace
 ; GCN-LABEL: {{^}}test_default_vi:
 ; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #1 {
+define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "target-cpu"="fiji" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -21,7 +21,7 @@ define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace
 ; GCN-LABEL: {{^}}test_f64_denormals:
 ; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #2 {
+define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="ieee,ieee" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -30,7 +30,7 @@ define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrsp
 ; GCN-LABEL: {{^}}test_f32_denormals:
 ; GCNL: FloatMode: 48
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #3 {
+define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math-f32"="ieee,ieee" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -39,7 +39,7 @@ define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrsp
 ; GCN-LABEL: {{^}}test_f32_f64_denormals:
 ; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #4 {
+define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="ieee,ieee" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -48,7 +48,7 @@ define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr ad
 ; GCN-LABEL: {{^}}test_no_denormals
 ; GCN: FloatMode: 0
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #5 {
+define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -57,7 +57,7 @@ define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspa
 ; GCN-LABEL: {{^}}test_f16_f64_denormals:
 ; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_f16_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #6 {
+define amdgpu_kernel void @test_f16_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="ieee,ieee" {
   store half 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -66,7 +66,7 @@ define amdgpu_kernel void @test_f16_f64_denormals(ptr addrspace(1) %out0, ptr ad
 ; GCN-LABEL: {{^}}test_no_f16_f64_denormals:
 ; GCN: FloatMode: 48
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_no_f16_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #7 {
+define amdgpu_kernel void @test_no_f16_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" {
   store half 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -75,7 +75,7 @@ define amdgpu_kernel void @test_no_f16_f64_denormals(ptr addrspace(1) %out0, ptr
 ; GCN-LABEL: {{^}}test_f32_f16_f64_denormals:
 ; GCN: FloatMode: 240
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_f32_f16_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2) #8 {
+define amdgpu_kernel void @test_f32_f16_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2) nounwind "denormal-fp-math"="ieee,ieee" {
   store half 0.0, ptr addrspace(1) %out0
   store float 0.0, ptr addrspace(1) %out1
   store double 0.0, ptr addrspace(1) %out2
@@ -85,7 +85,7 @@ define amdgpu_kernel void @test_f32_f16_f64_denormals(ptr addrspace(1) %out0, pt
 ; GCN-LABEL: {{^}}test_just_f32_attr_flush
 ; GCN: FloatMode: 192
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_just_f32_attr_flush(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #9 {
+define amdgpu_kernel void @test_just_f32_attr_flush(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -94,7 +94,7 @@ define amdgpu_kernel void @test_just_f32_attr_flush(ptr addrspace(1) %out0, ptr
 ; GCN-LABEL: {{^}}test_flush_all_outputs:
 ; GCN: FloatMode: 80
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_flush_all_outputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #10 {
+define amdgpu_kernel void @test_flush_all_outputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="preserve-sign,ieee" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -103,7 +103,7 @@ define amdgpu_kernel void @test_flush_all_outputs(ptr addrspace(1) %out0, ptr ad
 ; GCN-LABEL: {{^}}test_flush_all_inputs:
 ; GCN: FloatMode: 160
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_flush_all_inputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #11 {
+define amdgpu_kernel void @test_flush_all_inputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="ieee,preserve-sign" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -112,7 +112,7 @@ define amdgpu_kernel void @test_flush_all_inputs(ptr addrspace(1) %out0, ptr add
 ; GCN-LABEL: {{^}}test_flush_f32_inputs:
 ; GCN: FloatMode: 224
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_flush_f32_inputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #12 {
+define amdgpu_kernel void @test_flush_f32_inputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math-f32"="ieee,preserve-sign" "denormal-fp-math"="ieee,ieee" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -121,7 +121,7 @@ define amdgpu_kernel void @test_flush_f32_inputs(ptr addrspace(1) %out0, ptr add
 ; GCN-LABEL: {{^}}test_flush_f32_outputs:
 ; GCN: FloatMode: 208
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_flush_f32_outputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #13 {
+define amdgpu_kernel void @test_flush_f32_outputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math-f32"="preserve-sign,ieee" "denormal-fp-math"="ieee,ieee" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -130,7 +130,7 @@ define amdgpu_kernel void @test_flush_f32_outputs(ptr addrspace(1) %out0, ptr ad
 ; GCN-LABEL: {{^}}test_flush_f64_inputs:
 ; GCN: FloatMode: 176
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_flush_f64_inputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #14 {
+define amdgpu_kernel void @test_flush_f64_inputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="ieee,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -139,7 +139,7 @@ define amdgpu_kernel void @test_flush_f64_inputs(ptr addrspace(1) %out0, ptr add
 ; GCN-LABEL: {{^}}test_flush_f64_outputs:
 ; GCN: FloatMode: 112
 ; GCN: IeeeMode: 1
-define amdgpu_kernel void @test_flush_f64_outputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #15 {
+define amdgpu_kernel void @test_flush_f64_outputs(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="preserve-sign,ieee" "denormal-fp-math-f32"="ieee,ieee" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -169,20 +169,3 @@ entry:
 }
 
 declare void @llvm.amdgcn.kill(i1)
-
-attributes #0 = { nounwind "target-cpu"="tahiti" }
-attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "denormal-fp-math"="ieee,ieee" }
-attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
-attributes #4 = { nounwind "denormal-fp-math"="ieee,ieee" }
-attributes #5 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #6 = { nounwind "denormal-fp-math"="ieee,ieee" }
-attributes #7 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #8 = { nounwind "denormal-fp-math"="ieee,ieee" }
-attributes #9 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #10 = { nounwind "denormal-fp-math"="preserve-sign,ieee" }
-attributes #11 = { nounwind "denormal-fp-math"="ieee,preserve-sign" }
-attributes #12 = { nounwind "denormal-fp-math-f32"="ieee,preserve-sign" "denormal-fp-math"="ieee,ieee" }
-attributes #13 = { nounwind "denormal-fp-math-f32"="preserve-sign,ieee" "denormal-fp-math"="ieee,ieee" }
-attributes #14 = { nounwind "denormal-fp-math"="ieee,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" }
-attributes #15 = { nounwind "denormal-fp-math"="preserve-sign,ieee" "denormal-fp-math-f32"="ieee,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll b/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll
index 730df533b731a8..26b86a61eaec61 100644
--- a/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll
+++ b/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll
@@ -36,7 +36,7 @@ bb:
 ; GCN-LABEL: {{^}}name:{{[ 	]*}}no_vector_clause
 ; GCN-NOT:   BUNDLE
 ; GCN-NOT:   KILL
-define amdgpu_kernel void @no_vector_clause(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture %arg1) #0 {
+define amdgpu_kernel void @no_vector_clause(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture %arg1) "amdgpu-max-memory-clause"="1" {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = zext i32 %tmp to i64
@@ -64,5 +64,3 @@ bb:
 
 declare i32 @llvm.amdgcn.workitem.id.x()
 
-attributes #0 = { "amdgpu-max-memory-clause"="1" }
-
diff --git a/llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll b/llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll
index af274a9ab1c82b..992bc47e3221d4 100644
--- a/llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll
@@ -11,7 +11,7 @@
 ; GCN-NOT: v_readfirstlane
 ; PRE-GFX9: flat_load_dword
 ; GFX9: global_load 
-define dllexport amdgpu_vs void @_amdgpu_vs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) local_unnamed_addr #0 {
+define dllexport amdgpu_vs void @_amdgpu_vs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) local_unnamed_addr nounwind readnone speculatable {
 .entry:
   %tmp = add i32 %arg4, %arg8
   %tmp9 = sext i32 %tmp to i64
@@ -19,12 +19,9 @@ define dllexport amdgpu_vs void @_amdgpu_vs_main(i32 inreg %arg, i32 inreg %arg1
   %tmp11 = load <2 x float>, ptr addrspace(4) %tmp10, align 8
   %tmp12 = fadd nnan arcp contract <2 x float> zeroinitializer, %tmp11
   %tmp13 = extractelement <2 x float> %tmp12, i32 1
-  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float undef, float %tmp13, float 0.000000e+00, float 1.000000e+00, i1 true, i1 false) #1
+  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float undef, float %tmp13, float 0.000000e+00, float 1.000000e+00, i1 true, i1 false) nounwind
   ret void
 }
 
-declare i64 @llvm.amdgcn.s.getpc() #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind }
+declare i64 @llvm.amdgcn.s.getpc() nounwind readnone speculatable
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll b/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll
index 243618ae5a63ee..e4ae0fbd3760e9 100644
--- a/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll
@@ -10,9 +10,9 @@
 ; GCN-NOT: v_readfirstlane
 ; PRE-GFX9: flat_load_dword
 ; GFX9: global_load 
-define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 inreg %arg) local_unnamed_addr #0 {
+define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 inreg %arg) local_unnamed_addr nounwind "InitialPSInputAddr"="0" {
 .entry:
-  %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %arg) #1
+  %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %arg) nounwind readnone speculatable
   %tmp1 = bitcast float %tmp to i32
   %tmp2 = srem i32 %tmp1, 4
   %tmp3 = select i1 false, i32 undef, i32 %tmp2
@@ -20,15 +20,11 @@ define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 inreg %arg) local_unnamed_a
   %tmp5 = getelementptr [4 x <4 x float>], ptr addrspace(4) @0, i64 0, i64 %tmp4
   %tmp6 = load <4 x float>, ptr addrspace(4) %tmp5, align 16
   %tmp7 = extractelement <4 x float> %tmp6, i32 3
-  %tmp8 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp7) #1
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> undef, <2 x half> %tmp8, i1 true, i1 true) #2
+  %tmp8 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp7) nounwind readnone speculatable
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> undef, <2 x half> %tmp8, i1 true, i1 true) nounwind
   ret void
 }
 
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #2
-
-attributes #0 = { nounwind "InitialPSInputAddr"="0" }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind }
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) nounwind readnone speculatable
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone speculatable
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll b/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll
index 2ef7bfb9b857f6..cd417479320670 100644
--- a/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll
+++ b/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll
@@ -4,7 +4,7 @@ target datalayout = "n32"
 
 ; CHECK-LABEL: @switch_unreachable_default
 
-define amdgpu_kernel void @switch_unreachable_default(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @switch_unreachable_default(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind readnone {
 centry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   switch i32 %tid, label %sw.default [
@@ -59,7 +59,4 @@ sw.exit:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { convergent noinline optnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
index c135acaaf4744b..97616f9ab35193 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
@@ -65,7 +65,4 @@ define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll
index a7f3c18accc93d..5a8521eb7826d8 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll
@@ -3,7 +3,7 @@
 
 ; GCN-LABEL: name: uniform_add_SIC
 ; GCN: S_SUB_I32 killed %{{[0-9]+}}, 32
-define amdgpu_kernel void @uniform_add_SIC(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @uniform_add_SIC(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i32, ptr addrspace(1) %in
   %result = add i32 %a, -32
   store i32 %result, ptr addrspace(1) %out
@@ -13,7 +13,7 @@ define amdgpu_kernel void @uniform_add_SIC(ptr addrspace(1) %out, ptr addrspace(
 ; GCN-LABEL: name: divergent_add_SIC
 ; SI: V_SUB_CO_U32_e64 killed %{{[0-9]+}}, 32
 ; GFX900: V_SUB_U32_e64 killed %{{[0-9]+}}, 32
-define amdgpu_kernel void @divergent_add_SIC(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @divergent_add_SIC(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
   %a = load volatile i32, ptr addrspace(1) %gep
@@ -22,7 +22,4 @@ define amdgpu_kernel void @divergent_add_SIC(ptr addrspace(1) %out, ptr addrspac
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll
index 8c3155fc5c6ea8..8ee3a8785afb4b 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN:  llc -mtriple=amdgcn  < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s
 
-define amdgpu_kernel void @uniform_sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @uniform_sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; GCN-LABEL: uniform_sext_in_reg_i8_to_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -22,7 +22,7 @@ define amdgpu_kernel void @uniform_sext_in_reg_i8_to_i32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @divergent_sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @divergent_sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; GCN-LABEL: divergent_sext_in_reg_i8_to_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -45,7 +45,7 @@ define amdgpu_kernel void @divergent_sext_in_reg_i8_to_i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @uniform_sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @uniform_sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; GCN-LABEL: uniform_sext_in_reg_i16_to_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -66,7 +66,7 @@ define amdgpu_kernel void @uniform_sext_in_reg_i16_to_i32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @divergent_sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @divergent_sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; GCN-LABEL: divergent_sext_in_reg_i16_to_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -89,7 +89,4 @@ define amdgpu_kernel void @divergent_sext_in_reg_i16_to_i32(ptr addrspace(1) %ou
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 757458363284c6..780c673133230c 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -122,14 +122,10 @@ endloop:                                          ; preds = %if1, %Flow2
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare float @llvm.sqrt.f32(float) #0
+declare float @llvm.sqrt.f32(float) nounwind readnone speculatable willreturn
 
 ; Function Attrs: nounwind readnone speculatable
-declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #1
+declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) nounwind readnone speculatable
 
 ; Function Attrs: inaccessiblememonly nounwind writeonly
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #2
-
-attributes #0 = { nounwind readnone speculatable willreturn }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { inaccessiblememonly nounwind writeonly }
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) inaccessiblememonly nounwind writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
index b4218bc2afc7f8..3c607768fd5f0e 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @dpp64_ceil(ptr addrspace(1) %arg, i64 %in1) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
   %load = load i64, ptr addrspace(1) %gep
-  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 337, i32 15, i32 15, i1 1) #0
+  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 337, i32 15, i32 15, i1 1) nounwind readnone convergent
   %tmp1 = bitcast i64 %tmp0 to double
   %round = tail call double @llvm.ceil.f64(double %tmp1)
   %tmp2 = bitcast double %round to i64
@@ -27,7 +27,7 @@ define amdgpu_kernel void @dpp64_rcp(ptr addrspace(1) %arg, i64 %in1) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
   %load = load i64, ptr addrspace(1) %gep
-  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 337, i32 15, i32 15, i1 1) #0
+  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 337, i32 15, i32 15, i1 1) nounwind readnone convergent
   %tmp1 = bitcast i64 %tmp0 to double
   %rcp = call double @llvm.amdgcn.rcp.f64(double %tmp1)
   %tmp2 = bitcast double %rcp to i64
@@ -42,7 +42,7 @@ define amdgpu_kernel void @dpp64_rcp_unsupported_ctl(ptr addrspace(1) %arg, i64
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
   %load = load i64, ptr addrspace(1) %gep
-  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 15, i32 15, i1 1) #0
+  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 15, i32 15, i1 1) nounwind readnone convergent
   %tmp1 = bitcast i64 %tmp0 to double
   %rcp = fdiv fast double 1.0, %tmp1
   %tmp2 = bitcast double %rcp to i64
@@ -61,7 +61,7 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
   %load = load i64, ptr addrspace(1) %gep
-  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 337, i32 15, i32 15, i1 1) #0
+  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 337, i32 15, i32 15, i1 1) nounwind readnone convergent
   %tmp1 = bitcast i64 %tmp0 to double
   %rcp = fdiv double 15.0, %tmp1
   %tmp2 = bitcast double %rcp to i64
@@ -70,8 +70,6 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
 }
 
 declare i32 @llvm.amdgcn.workitem.id.x()
-declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) #0
+declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) nounwind readnone convergent
 declare double @llvm.ceil.f64(double)
 declare double @llvm.amdgcn.rcp.f64(double)
-
-attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
index cfc166ec798f45..b9f8086a7e0fd0 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
@@ -10,7 +10,7 @@ define amdgpu_kernel void @dpp_add(ptr addrspace(1) %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id
   %load = load i32, ptr addrspace(1) %gep
-  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) nounwind readnone convergent
   %add = add i32 %tmp0, %load
   store i32 %add, ptr addrspace(1) %gep
   ret void
@@ -23,7 +23,7 @@ define amdgpu_kernel void @dpp_ceil(ptr addrspace(1) %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id
   %load = load i32, ptr addrspace(1) %gep
-  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) nounwind readnone convergent
   %tmp1 = bitcast i32 %tmp0 to float
   %round = tail call float @llvm.ceil.f32(float %tmp1)
   %tmp2 = bitcast float %round to i32
@@ -38,7 +38,7 @@ define amdgpu_kernel void @dpp_fadd(ptr addrspace(1) %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id
   %load = load i32, ptr addrspace(1) %gep
-  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) nounwind readnone convergent
   %tmp1 = bitcast i32 %tmp0 to float
   %t = bitcast i32 %load to float
   %add = fadd float %tmp1, %t
@@ -49,7 +49,5 @@ define amdgpu_kernel void @dpp_fadd(ptr addrspace(1) %arg) {
 
 
 declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) nounwind readnone convergent
 declare float @llvm.ceil.f32(float)
-
-attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll b/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
index 91962c19dac8de..4545bc0a6e14b8 100644
--- a/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
@@ -9,9 +9,9 @@
 ; GCN: buffer_load_dword
 ; GCN: ds_write2_b32
 ; GCN: s_endpgm
-define amdgpu_kernel void @reschedule_global_load_lds_store(ptr addrspace(1) noalias %gptr0, ptr addrspace(1) noalias %gptr1, ptr addrspace(3) noalias %lptr, i32 %c) #0 {
+define amdgpu_kernel void @reschedule_global_load_lds_store(ptr addrspace(1) noalias %gptr0, ptr addrspace(1) noalias %gptr1, ptr addrspace(3) noalias %lptr, i32 %c) nounwind {
 entry:
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx = shl i32 %tid, 2
   %gep0 = getelementptr i32, ptr addrspace(1) %gptr0, i32 %idx
   %gep1 = getelementptr i32, ptr addrspace(1) %gptr1, i32 %idx
@@ -41,8 +41,4 @@ exit:                                             ; preds = %for.body, %entry
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { convergent nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
index 26418b07be82b5..b58db691da3956 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.amdgcn.s.barrier() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
 
 ; Function Attrs: nounwind
 ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
@@ -23,9 +23,9 @@ declare void @llvm.amdgcn.s.barrier() #1
 ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:34
 ; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
 ; CHECK: s_endpgm
-define amdgpu_kernel void @signed_ds_offset_addressing_loop(ptr addrspace(1) noalias nocapture %out, ptr addrspace(3) noalias nocapture readonly %lptr, i32 %n) #2 {
+define amdgpu_kernel void @signed_ds_offset_addressing_loop(ptr addrspace(1) noalias nocapture %out, ptr addrspace(3) noalias nocapture readonly %lptr, i32 %n) nounwind {
 entry:
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %mul = shl nsw i32 %x.i, 1
   br label %for.body
 
@@ -33,7 +33,7 @@ for.body:                                         ; preds = %for.body, %entry
   %sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ]
   %offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ]
   %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  tail call void @llvm.amdgcn.s.barrier() #1
+  tail call void @llvm.amdgcn.s.barrier() convergent nounwind
   %arrayidx = getelementptr inbounds float, ptr addrspace(3) %lptr, i32 %offset.02
   %tmp = load float, ptr addrspace(3) %arrayidx, align 4
   %add1 = add nsw i32 %offset.02, 2
@@ -64,7 +64,3 @@ for.end:                                          ; preds = %for.body
   store float %add13, ptr addrspace(1) %arrayidx15, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { convergent nounwind }
-attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
index 7b9b130e1cf796..5c084c992fca56 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -4,11 +4,11 @@
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 @lds.obj = addrspace(3) global [256 x i32] undef, align 4
 
-define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
+define amdgpu_kernel void @write_ds_sub0_offset0_global() nounwind readnone {
 ; CI-LABEL: write_ds_sub0_offset0_global:
 ; CI:       ; %bb.0: ; %entry
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -42,7 +42,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
 ; GFX11-NEXT:    ds_store_b32 v0, v1 offset:12
 ; GFX11-NEXT:    s_endpgm
 entry:
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %sub1 = sub i32 0, %x.i
   %tmp0 = getelementptr [256 x i32], ptr addrspace(3) @lds.obj, i32 0, i32 %sub1
   %arrayidx = getelementptr inbounds i32, ptr addrspace(3) %tmp0, i32 3
@@ -50,7 +50,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 {
+define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) nounwind readnone {
 ; CI-LABEL: write_ds_sub0_offset0_global_clamp_bit:
 ; CI:       ; %bb.0: ; %entry
 ; CI-NEXT:    s_load_dword s0, s[0:1], 0x0
@@ -122,7 +122,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
 entry:
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %sub1 = sub i32 0, %x.i
   %tmp0 = getelementptr [256 x i32], ptr addrspace(3) @lds.obj, i32 0, i32 %sub1
   %arrayidx = getelementptr inbounds i32, ptr addrspace(3) %tmp0, i32 3
@@ -132,7 +132,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy.val) #0 {
+define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy.val) nounwind readnone {
 ; CI-LABEL: write_ds_sub_max_offset_global_clamp_bit:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[0:1], 0x0
@@ -198,7 +198,7 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %sub1 = sub i32 -1, %x.i
   %tmp0 = getelementptr [256 x i32], ptr addrspace(3) @lds.obj, i32 0, i32 %sub1
   %arrayidx = getelementptr inbounds i32, ptr addrspace(3) %tmp0, i32 16383
@@ -208,7 +208,7 @@ define amdgpu_kernel void @write_ds_sub_max_offset_global_clamp_bit(float %dummy
   ret void
 }
 
-define amdgpu_kernel void @add_x_shl_max_offset() #1 {
+define amdgpu_kernel void @add_x_shl_max_offset() nounwind {
 ; CI-LABEL: add_x_shl_max_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
@@ -247,7 +247,7 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 {
 
 ; this could have the offset transform, but sub became xor
 
-define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
+define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() nounwind {
 ; CI-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -291,7 +291,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
 
 ; this could have the offset transform, but sub became xor
 
-define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 {
+define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() nounwind {
 ; CI-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -324,7 +324,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 {
 ; GFX11-NEXT:    v_xor_b32_e32 v0, 0xffff, v0
 ; GFX11-NEXT:    ds_store_b8 v0, v1
 ; GFX11-NEXT:    s_endpgm
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 65535, %shl
@@ -333,7 +333,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 {
   ret void
 }
 
-define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
+define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() nounwind {
 ; CI-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -366,7 +366,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, 0x10000, v0
 ; GFX11-NEXT:    ds_store_b8 v0, v1
 ; GFX11-NEXT:    s_endpgm
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 65536, %shl
@@ -375,7 +375,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
   ret void
 }
 
-define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
+define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() nounwind {
 ; CI-LABEL: add_x_shl_neg_to_sub_multi_use:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -412,7 +412,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
 ; GFX11-NEXT:    ds_store_b32 v0, v1 offset:123
 ; GFX11-NEXT:    ds_store_b32 v0, v1 offset:456
 ; GFX11-NEXT:    s_endpgm
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add0 = add i32 123, %shl
@@ -424,7 +424,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
   ret void
 }
 
-define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
+define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() nounwind {
 ; CI-LABEL: add_x_shl_neg_to_sub_multi_use_same_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -461,7 +461,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
 ; GFX11-NEXT:    ds_store_b32 v0, v1 offset:123
 ; GFX11-NEXT:    ds_store_b32 v0, v1 offset:123
 ; GFX11-NEXT:    s_endpgm
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 123, %shl
@@ -471,7 +471,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
   ret void
 }
 
-define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
+define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() nounwind {
 ; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -509,7 +509,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, 0x3fb, v0
 ; GFX11-NEXT:    ds_store_2addr_b32 v0, v1, v2 offset1:1
 ; GFX11-NEXT:    s_endpgm
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 1019, %shl
@@ -518,7 +518,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
   ret void
 }
 
-define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 {
+define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) nounwind {
 ; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[0:1], 0x0
@@ -594,7 +594,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 1019, %shl
@@ -605,7 +605,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
   ret void
 }
 
-define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
+define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() nounwind {
 ; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -643,7 +643,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #
 ; GFX11-NEXT:    v_sub_nc_u32_e32 v0, 0x3fc, v0
 ; GFX11-NEXT:    ds_store_2addr_b32 v0, v1, v2 offset1:1
 ; GFX11-NEXT:    s_endpgm
-  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 1020, %shl
@@ -653,7 +653,3 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #
 }
 
 declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1)
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 777a8f3fef1c17..fc2aa4a46466a3 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -9,7 +9,7 @@
 @lds = addrspace(3) global [512 x float] undef, align 4
 @lds.f64 = addrspace(3) global [512 x double] undef, align 8
 
-define amdgpu_kernel void @simple_read2_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f32(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -33,7 +33,7 @@ define amdgpu_kernel void @simple_read2_f32(ptr addrspace(1) %out) #0 {
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -45,7 +45,7 @@ define amdgpu_kernel void @simple_read2_f32(ptr addrspace(1) %out) #0 {
   ret void
 }
 
-define amdgpu_kernel void @simple_read2_f32_max_offset(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f32_max_offset(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f32_max_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -69,7 +69,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(ptr addrspace(1) %out) #0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 255
@@ -81,7 +81,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(ptr addrspace(1) %out) #0
   ret void
 }
 
-define amdgpu_kernel void @simple_read2_f32_too_far(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f32_too_far(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f32_too_far:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -107,7 +107,7 @@ define amdgpu_kernel void @simple_read2_f32_too_far(ptr addrspace(1) %out) #0 {
 ; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 257
@@ -119,7 +119,7 @@ define amdgpu_kernel void @simple_read2_f32_too_far(ptr addrspace(1) %out) #0 {
   ret void
 }
 
-define amdgpu_kernel void @simple_read2_f32_x2(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f32_x2(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f32_x2:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -149,7 +149,7 @@ define amdgpu_kernel void @simple_read2_f32_x2(ptr addrspace(1) %out) #0 {
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v4, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %idx.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -175,7 +175,7 @@ define amdgpu_kernel void @simple_read2_f32_x2(ptr addrspace(1) %out) #0 {
 }
 
 ; Make sure there is an instruction between the two sets of reads.
-define amdgpu_kernel void @simple_read2_f32_x2_barrier(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f32_x2_barrier(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f32_x2_barrier:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -209,7 +209,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(ptr addrspace(1) %out) #0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v4, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %idx.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -219,7 +219,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(ptr addrspace(1) %out) #0
   %val1 = load float, ptr addrspace(3) %arrayidx1, align 4
   %sum.0 = fadd float %val0, %val1
 
-  call void @llvm.amdgcn.s.barrier() #2
+  call void @llvm.amdgcn.s.barrier() convergent nounwind
 
   %idx.2 = add nsw i32 %tid.x, 11
   %arrayidx2 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %idx.2
@@ -238,7 +238,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(ptr addrspace(1) %out) #0
 
 ; For some reason adding something to the base address for the first
 ; element results in only folding the inner pair.
-define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f32_x2_nonzero_base:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -268,7 +268,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(ptr addrspace(1) %ou
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v4, v0, s[0:1] offset:8
 ; GFX9-NEXT:    s_endpgm
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %idx.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -298,7 +298,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(ptr addrspace(1) %ou
 ; merge.
 ; Base pointers come from different subregister of same super
 ; register. We can't safely merge this.
-define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(ptr addrspace(1) %out, <2 x ptr addrspace(3)> %lds.ptr) #0 {
+define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(ptr addrspace(1) %out, <2 x ptr addrspace(3)> %lds.ptr) nounwind {
 ; CI-LABEL: read2_ptr_is_subreg_arg_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -330,7 +330,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(ptr addrspace(1) %out, <2
 ; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
   %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
   %gep = getelementptr inbounds float, <2 x ptr addrspace(3)> %lds.ptr, <2 x i32> %index.1
@@ -349,7 +349,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(ptr addrspace(1) %out, <2
 ; are rejecting merges that have the same, constant 0 offset, so make
 ; sure we are really rejecting it because of the different
 ; subregisters.
-define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(ptr addrspace(1) %out, <2 x ptr addrspace(3)> %lds.ptr) #0 {
+define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(ptr addrspace(1) %out, <2 x ptr addrspace(3)> %lds.ptr) nounwind {
 ; CI-LABEL: read2_ptr_is_subreg_arg_offset_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -381,7 +381,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(ptr addrspace(1) %
 ; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
   %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
   %gep = getelementptr inbounds float, <2 x ptr addrspace(3)> %lds.ptr, <2 x i32> %index.1
@@ -400,7 +400,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @read2_ptr_is_subreg_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @read2_ptr_is_subreg_f32(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: read2_ptr_is_subreg_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -424,7 +424,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(ptr addrspace(1) %out) #0 {
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %ptr.0 = insertelement <2 x ptr addrspace(3)> undef, ptr addrspace(3) @lds, i32 0
   %ptr.1 = insertelement <2 x ptr addrspace(3)> %ptr.0, ptr addrspace(3) @lds, i32 1
   %x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
@@ -442,7 +442,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(ptr addrspace(1) %out) #0 {
   ret void
 }
 
-define amdgpu_kernel void @simple_read2_f32_volatile_0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f32_volatile_0(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f32_volatile_0:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -468,7 +468,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(ptr addrspace(1) %out) #0
 ; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
   %val0 = load volatile float, ptr addrspace(3) %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -480,7 +480,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(ptr addrspace(1) %out) #0
   ret void
 }
 
-define amdgpu_kernel void @simple_read2_f32_volatile_1(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f32_volatile_1(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f32_volatile_1:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -506,7 +506,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(ptr addrspace(1) %out) #0
 ; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -519,7 +519,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(ptr addrspace(1) %out) #0
 }
 
 ; Can't fold since not correctly aligned.
-define amdgpu_kernel void @unaligned_read2_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @unaligned_read2_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 ; CI-LABEL: unaligned_read2_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
@@ -600,7 +600,7 @@ define amdgpu_kernel void @unaligned_read2_f32(ptr addrspace(1) %out, ptr addrsp
 ; GFX9-UNALIGNED-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-UNALIGNED-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX9-UNALIGNED-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds float, ptr addrspace(3) %lds, i32 %x.i
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 1
   %add.x = add nsw i32 %x.i, 8
@@ -612,7 +612,7 @@ define amdgpu_kernel void @unaligned_read2_f32(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @unaligned_offset_read2_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @unaligned_offset_read2_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 ; CI-LABEL: unaligned_offset_read2_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
@@ -693,7 +693,7 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(ptr addrspace(1) %out, ptr
 ; GFX9-UNALIGNED-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-UNALIGNED-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX9-UNALIGNED-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %base = getelementptr inbounds float, ptr addrspace(3) %lds, i32 %x.i
   %addr0.i8 = getelementptr inbounds i8, ptr addrspace(3) %base, i32 5
   %val0 = load float, ptr addrspace(3) %addr0.i8, align 1
@@ -705,7 +705,7 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @misaligned_2_simple_read2_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @misaligned_2_simple_read2_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 ; CI-LABEL: misaligned_2_simple_read2_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
@@ -763,7 +763,7 @@ define amdgpu_kernel void @misaligned_2_simple_read2_f32(ptr addrspace(1) %out,
 ; GFX9-UNALIGNED-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-UNALIGNED-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX9-UNALIGNED-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds float, ptr addrspace(3) %lds, i32 %x.i
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 2
   %add.x = add nsw i32 %x.i, 8
@@ -775,7 +775,7 @@ define amdgpu_kernel void @misaligned_2_simple_read2_f32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @simple_read2_f64(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f64(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
@@ -799,7 +799,7 @@ define amdgpu_kernel void @simple_read2_f64(ptr addrspace(1) %out) #0 {
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds [512 x double], ptr addrspace(3) @lds.f64, i32 0, i32 %x.i
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
@@ -811,7 +811,7 @@ define amdgpu_kernel void @simple_read2_f64(ptr addrspace(1) %out) #0 {
   ret void
 }
 
-define amdgpu_kernel void @simple_read2_f64_max_offset(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f64_max_offset(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f64_max_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
@@ -835,7 +835,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(ptr addrspace(1) %out) #0
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds [512 x double], ptr addrspace(3) @lds.f64, i32 0, i32 %x.i
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 255
@@ -847,7 +847,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(ptr addrspace(1) %out) #0
   ret void
 }
 
-define amdgpu_kernel void @simple_read2_f64_too_far(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @simple_read2_f64_too_far(ptr addrspace(1) %out) nounwind {
 ; CI-LABEL: simple_read2_f64_too_far:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
@@ -873,7 +873,7 @@ define amdgpu_kernel void @simple_read2_f64_too_far(ptr addrspace(1) %out) #0 {
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds [512 x double], ptr addrspace(3) @lds.f64, i32 0, i32 %x.i
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 257
@@ -886,7 +886,7 @@ define amdgpu_kernel void @simple_read2_f64_too_far(ptr addrspace(1) %out) #0 {
 }
 
 ; Alignment only 4
-define amdgpu_kernel void @misaligned_read2_f64(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @misaligned_read2_f64(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 ; CI-LABEL: misaligned_read2_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
@@ -918,7 +918,7 @@ define amdgpu_kernel void @misaligned_read2_f64(ptr addrspace(1) %out, ptr addrs
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %arrayidx0 = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %x.i
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 7
@@ -1065,7 +1065,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(ptr addrspac
 @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
 
-define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(ptr addrspace(1) %C, i32 %lda, i32 %ldb) #0 {
+define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(ptr addrspace(1) %C, i32 %lda, i32 %ldb) nounwind {
 ; CI-LABEL: sgemm_inner_loop_read2_sequence:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
@@ -1126,8 +1126,8 @@ define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(ptr addrspace(1) %C,
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v9
 ; GFX9-NEXT:    global_store_dword v10, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
-  %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1
+  %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+  %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
   %arrayidx44 = getelementptr inbounds [264 x float], ptr addrspace(3) @sgemm.lA, i32 0, i32 %x.i
   %tmp16 = load float, ptr addrspace(3) %arrayidx44, align 4
   %add47 = add nsw i32 %x.i, 1
@@ -1169,7 +1169,7 @@ define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(ptr addrspace(1) %C,
   ret void
 }
 
-define amdgpu_kernel void @misaligned_read2_v2i32(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @misaligned_read2_v2i32(ptr addrspace(1) %out, ptr addrspace(3) %in) nounwind {
 ; CI-LABEL: misaligned_read2_v2i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
@@ -1200,7 +1200,7 @@ define amdgpu_kernel void @misaligned_read2_v2i32(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @misaligned_read2_i64(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @misaligned_read2_i64(ptr addrspace(1) %out, ptr addrspace(3) %in) nounwind {
 ; CI-LABEL: misaligned_read2_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
@@ -1295,11 +1295,11 @@ define amdgpu_kernel void @ds_read_diff_base_interleaving(
   ptr addrspace(3) %arg1,
   ptr addrspace(3) %arg2,
   ptr addrspace(3) %arg3,
-  ptr addrspace(3) %arg4) #1 {
+  ptr addrspace(3) %arg4) nounwind readnone speculatable {
 bb:
   %tmp = getelementptr float, ptr addrspace(1) %arg, i64 10
-  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.x() #2
-  %tmp6 = tail call i32 @llvm.amdgcn.workitem.id.y() #2
+  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.x() convergent nounwind
+  %tmp6 = tail call i32 @llvm.amdgcn.workitem.id.y() convergent nounwind
   %tmp7 = getelementptr [4 x [4 x float]], ptr addrspace(3) %arg1, i32 0, i32 %tmp6, i32 0
   %tmp8 = getelementptr [4 x [4 x float]], ptr addrspace(3) %arg2, i32 0, i32 0, i32 %tmp5
   %tmp9 = getelementptr [4 x [4 x float]], ptr addrspace(3) %arg3, i32 0, i32 %tmp6, i32 0
@@ -1525,21 +1525,16 @@ entry:
   ret void
 }
 
-declare void @void_func_void() #3
+declare void @void_func_void() nounwind noinline
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
 
 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) nounwind readnone
 
-declare void @llvm.amdgcn.s.barrier() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { convergent nounwind }
-attributes #3 = { nounwind noinline }
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll b/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
index 3aff5ccff4280a..39036ce03bf0d8 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
@@ -12,8 +12,8 @@
 ; CI: s_waitcnt lgkmcnt(0)
 ; CI: buffer_store_dwordx2 [[RESULT]]
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v2f32_superreg_align4(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v2f32_superreg_align4(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds  [512 x <2 x float>], ptr addrspace(3) @lds.v2, i32 0, i32 %x.i
   %val0 = load <2 x float>, ptr addrspace(3) %arrayidx0, align 4
   %out.gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %out, i32 %x.i
@@ -26,8 +26,8 @@ define amdgpu_kernel void @simple_read2_v2f32_superreg_align4(ptr addrspace(1) %
 ; CI: s_waitcnt lgkmcnt(0)
 ; CI: buffer_store_dwordx2 [[RESULT]]
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v2f32_superreg(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v2f32_superreg(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x <2 x float>], ptr addrspace(3) @lds.v2, i32 0, i32 %x.i
   %val0 = load <2 x float>, ptr addrspace(3) %arrayidx0
   %out.gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %out, i32 %x.i
@@ -43,8 +43,8 @@ define amdgpu_kernel void @simple_read2_v2f32_superreg(ptr addrspace(1) %out) #0
 ; CI: v_add_f32_e32 v[[ADD2:[0-9]+]], v[[ADD0]], v[[ADD1]]
 ; CI: buffer_store_dword v[[ADD2]]
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v4f32_superreg_align4(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v4f32_superreg_align4(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x <4 x float>], ptr addrspace(3) @lds.v4, i32 0, i32 %x.i
   %val0 = load <4 x float>, ptr addrspace(3) %arrayidx0, align 4
   %elt0 = extractelement <4 x float> %val0, i32 0
@@ -68,8 +68,8 @@ define amdgpu_kernel void @simple_read2_v4f32_superreg_align4(ptr addrspace(1) %
 ; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[ADD0]], v[[REG_Y]]
 ; CI: buffer_store_dword v[[ADD1]]
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v3f32_superreg_align4(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v3f32_superreg_align4(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x <3 x float>], ptr addrspace(3) @lds.v3, i32 0, i32 %x.i
   %val0 = load <3 x float>, ptr addrspace(3) %arrayidx0, align 4
   %elt0 = extractelement <3 x float> %val0, i32 0
@@ -88,8 +88,8 @@ define amdgpu_kernel void @simple_read2_v3f32_superreg_align4(ptr addrspace(1) %
 ; CI: ds_read2_b64 [[REG_ZW:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset1:1{{$}}
 ; CI: buffer_store_dwordx4 [[REG_ZW]]
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v4f32_superreg_align8(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v4f32_superreg_align8(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x <4 x float>], ptr addrspace(3) @lds.v4, i32 0, i32 %x.i
   %val0 = load <4 x float>, ptr addrspace(3) %arrayidx0, align 8
   %out.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %out, i32 %x.i
@@ -101,8 +101,8 @@ define amdgpu_kernel void @simple_read2_v4f32_superreg_align8(ptr addrspace(1) %
 ; CI-DAG: ds_read2_b64 [[REG_ZW:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset1:1{{$}}
 ; CI: buffer_store_dwordx4 [[REG_ZW]]
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v4f32_superreg(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v4f32_superreg(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x <4 x float>], ptr addrspace(3) @lds.v4, i32 0, i32 %x.i
   %val0 = load <4 x float>, ptr addrspace(3) %arrayidx0
   %out.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %out, i32 %x.i
@@ -117,8 +117,8 @@ define amdgpu_kernel void @simple_read2_v4f32_superreg(ptr addrspace(1) %out) #0
 ; CI-DAG: buffer_store_dwordx4 [[VEC_HI]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
 ; CI-DAG: buffer_store_dwordx4 [[VEC_LO]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64{{$}}
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v8f32_superreg(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v8f32_superreg(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x <8 x float>], ptr addrspace(3) @lds.v8, i32 0, i32 %x.i
   %val0 = load <8 x float>, ptr addrspace(3) %arrayidx0
   %out.gep = getelementptr inbounds <8 x float>, ptr addrspace(1) %out, i32 %x.i
@@ -138,8 +138,8 @@ define amdgpu_kernel void @simple_read2_v8f32_superreg(ptr addrspace(1) %out) #0
 ; CI-DAG: buffer_store_dwordx4 [[VEC8_11]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32
 ; CI-DAG: buffer_store_dwordx4 [[VEC12_15]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v16f32_superreg(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v16f32_superreg(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x <16 x float>], ptr addrspace(3) @lds.v16, i32 0, i32 %x.i
   %val0 = load <16 x float>, ptr addrspace(3) %arrayidx0
   %out.gep = getelementptr inbounds <16 x float>, ptr addrspace(1) %out, i32 %x.i
@@ -153,8 +153,8 @@ define amdgpu_kernel void @simple_read2_v16f32_superreg(ptr addrspace(1) %out) #
 ; CI-NOT: v_mov {{v[0-9]+}}, {{[sv][0-9]+}}
 ; CI: buffer_store_dwordx2 v[[[REG_ELT0]]:[[REG_ELT1]]]
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v2f32_superreg_scalar_loads_align4(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v2f32_superreg_scalar_loads_align4(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
   %arrayidx1 = getelementptr inbounds float, ptr addrspace(3) %arrayidx0, i32 1
 
@@ -176,8 +176,8 @@ define amdgpu_kernel void @simple_read2_v2f32_superreg_scalar_loads_align4(ptr a
 ; CI-NOT: v_mov {{v[0-9]+}}, {{[sv][0-9]+}}
 ; CI: buffer_store_dwordx4 v[[[REG_ELT0]]:[[REG_ELT3]]]
 ; CI: s_endpgm
-define amdgpu_kernel void @simple_read2_v4f32_superreg_scalar_loads_align4(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2_v4f32_superreg_scalar_loads_align4(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
   %arrayidx1 = getelementptr inbounds float, ptr addrspace(3) %arrayidx0, i32 1
   %arrayidx2 = getelementptr inbounds float, ptr addrspace(3) %arrayidx0, i32 2
@@ -199,11 +199,7 @@ define amdgpu_kernel void @simple_read2_v4f32_superreg_scalar_loads_align4(ptr a
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { convergent nounwind }
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
index d15183e57c938b..ff189a723f5902 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
@@ -14,8 +14,8 @@
 ; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
 ; CI: buffer_store_dword [[RESULT]]
 ; GFX9: global_store_dword v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @simple_read2st64_f32_0_1(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2st64_f32_0_1(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
@@ -36,8 +36,8 @@ define amdgpu_kernel void @simple_read2st64_f32_0_1(ptr addrspace(1) %out) #0 {
 ; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
 ; CI: buffer_store_dword [[RESULT]]
 ; GFX9: global_store_dword v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @simple_read2st64_f32_1_2(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2st64_f32_1_2(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, ptr addrspace(3) %lds, i32 %add.x.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -59,8 +59,8 @@ define amdgpu_kernel void @simple_read2st64_f32_1_2(ptr addrspace(1) %out, ptr a
 ; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]]
 ; CI: buffer_store_dword [[RESULT]]
 ; GFX9: global_store_dword v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @simple_read2st64_f32_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2st64_f32_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, ptr addrspace(3) %lds, i32 %add.x.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -82,8 +82,8 @@ define amdgpu_kernel void @simple_read2st64_f32_max_offset(ptr addrspace(1) %out
 ; GCN-DAG: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
 ; GCN-DAG: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]{{$}}
 ; GCN: s_endpgm
-define amdgpu_kernel void @simple_read2st64_f32_over_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2st64_f32_over_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, ptr addrspace(3) %lds, i32 %add.x.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -102,8 +102,8 @@ define amdgpu_kernel void @simple_read2st64_f32_over_max_offset(ptr addrspace(1)
 
 ; GCN-NOT: ds_read2st64_b32
 ; GCN: s_endpgm
-define amdgpu_kernel void @odd_invalid_read2st64_f32_0(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @odd_invalid_read2st64_f32_0(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 63
@@ -121,8 +121,8 @@ define amdgpu_kernel void @odd_invalid_read2st64_f32_0(ptr addrspace(1) %out) #0
 
 ; GCN-NOT: ds_read2st64_b32
 ; GCN: s_endpgm
-define amdgpu_kernel void @odd_invalid_read2st64_f32_1(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @odd_invalid_read2st64_f32_1(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %add.x.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -144,8 +144,8 @@ define amdgpu_kernel void @odd_invalid_read2st64_f32_1(ptr addrspace(1) %out) #0
 ; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v[[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]]
 ; CI: buffer_store_dwordx2 [[RESULT]]
 ; GFX9: global_store_dwordx2 v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @simple_read2st64_f64_0_1(ptr addrspace(1) %out) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2st64_f64_0_1(ptr addrspace(1) %out) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds [512 x double], ptr addrspace(3) @lds.f64, i32 0, i32 %x.i
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 64
@@ -167,8 +167,8 @@ define amdgpu_kernel void @simple_read2st64_f64_0_1(ptr addrspace(1) %out) #0 {
 
 ; CI: buffer_store_dwordx2 [[RESULT]]
 ; GFX9: global_store_dwordx2 v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @simple_read2st64_f64_1_2(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2st64_f64_1_2(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %add.x.0
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 8
@@ -190,8 +190,8 @@ define amdgpu_kernel void @simple_read2st64_f64_1_2(ptr addrspace(1) %out, ptr a
 ; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
 ; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
 ; GCN: s_endpgm
-define amdgpu_kernel void @misaligned_read2st64_f64(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @misaligned_read2st64_f64(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %x.i
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
@@ -214,8 +214,8 @@ define amdgpu_kernel void @misaligned_read2st64_f64(ptr addrspace(1) %out, ptr a
 
 ; CI: buffer_store_dwordx2 [[RESULT]]
 ; GFX9: global_store_dwordx2 v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @simple_read2st64_f64_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2st64_f64_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %add.x.0 = add nsw i32 %x.i, 256
   %arrayidx0 = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %add.x.0
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 8
@@ -237,8 +237,8 @@ define amdgpu_kernel void @simple_read2st64_f64_max_offset(ptr addrspace(1) %out
 ; GCN-DAG: v_add_{{i|u}}32_e32 [[BIGADD:v[0-9]+]], {{(vcc, )?}}0x10000, {{v[0-9]+}}
 ; GCN: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @simple_read2st64_f64_over_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_read2st64_f64_over_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %add.x.0
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 8
@@ -257,8 +257,8 @@ define amdgpu_kernel void @simple_read2st64_f64_over_max_offset(ptr addrspace(1)
 
 ; GCN-NOT: ds_read2st64_b64
 ; GCN: s_endpgm
-define amdgpu_kernel void @invalid_read2st64_f64_odd_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @invalid_read2st64_f64_odd_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %add.x.0
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 8
@@ -281,8 +281,8 @@ define amdgpu_kernel void @invalid_read2st64_f64_odd_offset(ptr addrspace(1) %ou
 ; GCN-NOT: ds_read2st_b64
 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @byte_size_only_divisible_64_read2_f64(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @byte_size_only_divisible_64_read2_f64(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx0 = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %x.i
   %val0 = load double, ptr addrspace(3) %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
@@ -294,8 +294,5 @@ define amdgpu_kernel void @byte_size_only_divisible_64_read2_f64(ptr addrspace(1
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
index 06908d21e53556..baf6ac7229fb29 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
@@ -6,7 +6,7 @@
 @lds = addrspace(3) global [512 x float] undef, align 4
 @lds.f64 = addrspace(3) global [512 x double] undef, align 8
 
-define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: simple_write2_one_val_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
@@ -30,7 +30,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr ad
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v1 offset1:8
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep = getelementptr float, ptr addrspace(1) %in, i32 %x.i
   %val = load float, ptr addrspace(1) %in.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
@@ -41,7 +41,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simple_write2_two_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: simple_write2_two_val_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
@@ -69,7 +69,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(ptr addrspace(1) %C, ptr ad
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:8
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %x.i
   %in.gep.1 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 1
   %val0 = load volatile float, ptr addrspace(1) %in.gep.0, align 4
@@ -82,7 +82,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(ptr addrspace(1) %C, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; CI-LABEL: simple_write2_two_val_f32_volatile_0:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
@@ -115,7 +115,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(ptr addrspace(1)
 ; GFX9-NEXT:    ds_write_b32 v0, v1
 ; GFX9-NEXT:    ds_write_b32 v0, v2 offset:32
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in0.gep = getelementptr float, ptr addrspace(1) %in0, i32 %x.i
   %in1.gep = getelementptr float, ptr addrspace(1) %in1, i32 %x.i
   %val0 = load volatile float, ptr addrspace(1) %in0.gep, align 4
@@ -128,7 +128,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; CI-LABEL: simple_write2_two_val_f32_volatile_1:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
@@ -161,7 +161,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(ptr addrspace(1)
 ; GFX9-NEXT:    ds_write_b32 v0, v1
 ; GFX9-NEXT:    ds_write_b32 v0, v2 offset:32
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in0.gep = getelementptr float, ptr addrspace(1) %in0, i32 %x.i
   %in1.gep = getelementptr float, ptr addrspace(1) %in1, i32 %x.i
   %val0 = load volatile float, ptr addrspace(1) %in0.gep, align 4
@@ -179,7 +179,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(ptr addrspace(1)
 ;       This should be an s_mov_b32. The v_mov_b32 gets introduced by an
 ;       early legalization of the constant bus constraint on the v_lshl_add_u32,
 ;       and then SIFoldOperands folds in an unlucky order.
-define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: simple_write2_two_val_subreg2_mixed_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
@@ -211,7 +211,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(ptr addrspace
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v3 offset1:8
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep.0 = getelementptr <2 x float>, ptr addrspace(1) %in, i32 %x.i
   %in.gep.1 = getelementptr <2 x float>, ptr addrspace(1) %in.gep.0, i32 1
   %val0 = load volatile <2 x float>, ptr addrspace(1) %in.gep.0, align 8
@@ -226,7 +226,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: simple_write2_two_val_subreg2_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
@@ -252,7 +252,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(ptr addrspace(1) %C
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:8
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep = getelementptr <2 x float>, ptr addrspace(1) %in, i32 %x.i
   %val = load <2 x float>, ptr addrspace(1) %in.gep, align 8
   %val0 = extractelement <2 x float> %val, i32 0
@@ -265,7 +265,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(ptr addrspace(1) %C
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: simple_write2_two_val_subreg4_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
@@ -291,7 +291,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(ptr addrspace(1) %C
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v4 offset1:8
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep = getelementptr <4 x float>, ptr addrspace(1) %in, i32 %x.i
   %val = load <4 x float>, ptr addrspace(1) %in.gep, align 16
   %val0 = extractelement <4 x float> %val, i32 0
@@ -304,7 +304,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(ptr addrspace(1) %C
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: simple_write2_two_val_max_offset_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
@@ -332,7 +332,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(ptr addrspace(1)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:255
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %x.i
   %in.gep.1 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 1
   %val0 = load volatile float, ptr addrspace(1) %in.gep.0, align 4
@@ -345,7 +345,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_too_far_f32(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @simple_write2_two_val_too_far_f32(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; CI-LABEL: simple_write2_two_val_too_far_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
@@ -378,7 +378,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(ptr addrspace(1) %C
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write_b32 v0, v2 offset:1028
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in0.gep = getelementptr float, ptr addrspace(1) %in0, i32 %x.i
   %in1.gep = getelementptr float, ptr addrspace(1) %in1, i32 %x.i
   %val0 = load float, ptr addrspace(1) %in0.gep, align 4
@@ -391,7 +391,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(ptr addrspace(1) %C
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_f32_x2(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @simple_write2_two_val_f32_x2(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; CI-LABEL: simple_write2_two_val_f32_x2:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
@@ -422,7 +422,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(ptr addrspace(1) %C, ptr
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:8
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset0:11 offset1:27
 ; GFX9-NEXT:    s_endpgm
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in0.gep = getelementptr float, ptr addrspace(1) %in0, i32 %tid.x
   %in1.gep = getelementptr float, ptr addrspace(1) %in1, i32 %tid.x
   %val0 = load float, ptr addrspace(1) %in0.gep, align 4
@@ -447,7 +447,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(ptr addrspace(1) %C, ptr
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; CI-LABEL: simple_write2_two_val_f32_x2_nonzero_base:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
@@ -478,7 +478,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(ptr addrspa
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset0:3 offset1:8
 ; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset0:11 offset1:27
 ; GFX9-NEXT:    s_endpgm
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in0.gep = getelementptr float, ptr addrspace(1) %in0, i32 %tid.x
   %in1.gep = getelementptr float, ptr addrspace(1) %in1, i32 %tid.x
   %val0 = load float, ptr addrspace(1) %in0.gep, align 4
@@ -503,7 +503,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1, <2 x ptr addrspace(3)> %lds.ptr) #0 {
+define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in0, ptr addrspace(1) %in1, <2 x ptr addrspace(3)> %lds.ptr) nounwind {
 ; CI-LABEL: write2_ptr_subreg_arg_two_val_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2
@@ -542,7 +542,7 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(ptr addrspace(1) %C
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write_b32 v3, v2 offset:32
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in0.gep = getelementptr float, ptr addrspace(1) %in0, i32 %x.i
   %in1.gep = getelementptr float, ptr addrspace(1) %in1, i32 %x.i
   %val0 = load float, ptr addrspace(1) %in0.gep, align 4
@@ -563,7 +563,7 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(ptr addrspace(1) %C
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_one_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simple_write2_one_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: simple_write2_one_val_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
@@ -587,7 +587,7 @@ define amdgpu_kernel void @simple_write2_one_val_f64(ptr addrspace(1) %C, ptr ad
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b64 v2, v[0:1], v[0:1] offset1:8
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep = getelementptr double, ptr addrspace(1) %in, i32 %x.i
   %val = load double, ptr addrspace(1) %in.gep, align 8
   %arrayidx0 = getelementptr inbounds [512 x double], ptr addrspace(3) @lds.f64, i32 0, i32 %x.i
@@ -598,7 +598,7 @@ define amdgpu_kernel void @simple_write2_one_val_f64(ptr addrspace(1) %C, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) nounwind {
 ; CI-LABEL: misaligned_simple_write2_one_val_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
@@ -628,7 +628,7 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(ptr addrspace(1)
 ; GFX9-NEXT:    ds_write2_b32 v2, v0, v1 offset1:1
 ; GFX9-NEXT:    ds_write2_b32 v2, v0, v1 offset0:14 offset1:15
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep = getelementptr double, ptr addrspace(1) %in, i32 %x.i
   %val = load double, ptr addrspace(1) %in.gep, align 8
   %arrayidx0 = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %x.i
@@ -639,7 +639,7 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) nounwind {
 ; CI-LABEL: unaligned_offset_simple_write2_one_val_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
@@ -712,7 +712,7 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(ptr addrsp
 ; GFX9-UNALIGNED-NEXT:    ds_write_b64 v2, v[0:1] offset:5
 ; GFX9-UNALIGNED-NEXT:    ds_write_b64 v2, v[0:1] offset:9
 ; GFX9-UNALIGNED-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep = getelementptr double, ptr addrspace(1) %in, i32 %x.i
   %val = load double, ptr addrspace(1) %in.gep, align 8
   %base = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %x.i
@@ -723,7 +723,7 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_two_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simple_write2_two_val_f64(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: simple_write2_two_val_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
@@ -751,7 +751,7 @@ define amdgpu_kernel void @simple_write2_two_val_f64(ptr addrspace(1) %C, ptr ad
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ds_write2_b64 v4, v[0:1], v[2:3] offset1:8
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %x.i
   %in.gep.1 = getelementptr double, ptr addrspace(1) %in.gep.0, i32 1
   %val0 = load volatile double, ptr addrspace(1) %in.gep.0, align 8
@@ -865,7 +865,7 @@ define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
 @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
 
-define amdgpu_kernel void @write2_sgemm_sequence(ptr addrspace(1) %C, i32 %lda, i32 %ldb, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @write2_sgemm_sequence(ptr addrspace(1) %C, i32 %lda, i32 %ldb, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: write2_sgemm_sequence:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x4
@@ -908,8 +908,8 @@ define amdgpu_kernel void @write2_sgemm_sequence(ptr addrspace(1) %C, i32 %lda,
 ; GFX9-NEXT:    ds_write2_b32 v0, v3, v4 offset0:32 offset1:33
 ; GFX9-NEXT:    ds_write2_b32 v0, v3, v4 offset0:64 offset1:65
 ; GFX9-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
-  %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1
+  %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+  %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
   %val = load float, ptr addrspace(1) %in
   %arrayidx44 = getelementptr inbounds [264 x float], ptr addrspace(3) @sgemm.lA, i32 0, i32 %x.i
   store float %val, ptr addrspace(3) %arrayidx44, align 4
@@ -942,7 +942,7 @@ define amdgpu_kernel void @write2_sgemm_sequence(ptr addrspace(1) %C, i32 %lda,
   ret void
 }
 
-define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(ptr addrspace(3) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(ptr addrspace(3) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: simple_write2_v4f32_superreg_align4:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
@@ -992,7 +992,7 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(ptr addrspace(3)
 ; GFX9-UNALIGNED-NEXT:    ds_write2_b32 v0, v1, v2 offset0:2 offset1:3
 ; GFX9-UNALIGNED-NEXT:    ds_write2_b32 v0, v3, v4 offset1:1
 ; GFX9-UNALIGNED-NEXT:    s_endpgm
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %in.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %in
   %val0 = load <4 x float>, ptr addrspace(1) %in.gep, align 4
   %out.gep = getelementptr inbounds <4 x float>, ptr addrspace(3) %out, i32 %x.i
@@ -1048,11 +1048,7 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { convergent nounwind }
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll b/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll
index 26ccc32e1bf678..0c8b6a8313a04e 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll
@@ -11,8 +11,8 @@
 ; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
 ; GCN: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1
 ; GCN: s_endpgm
-define amdgpu_kernel void @simple_write2st64_one_val_f32_0_1(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_write2st64_one_val_f32_0_1(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.gep = getelementptr float, ptr addrspace(1) %in, i32 %x.i
   %val = load float, ptr addrspace(1) %in.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
@@ -37,8 +37,8 @@ define amdgpu_kernel void @simple_write2st64_one_val_f32_0_1(ptr addrspace(1) %C
 ; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
 ; GCN: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5
 ; GCN: s_endpgm
-define amdgpu_kernel void @simple_write2st64_two_val_f32_2_5(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_write2st64_two_val_f32_2_5(ptr addrspace(1) %C, ptr addrspace(1) %in) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %x.i
   %in.gep.1 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 1
   %val0 = load volatile float, ptr addrspace(1) %in.gep.0, align 4
@@ -66,8 +66,8 @@ define amdgpu_kernel void @simple_write2st64_two_val_f32_2_5(ptr addrspace(1) %C
 ; GCN-DAG: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]]
 ; GCN: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
 ; GCN: s_endpgm
-define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f32(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f32(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %x.i
   %in.gep.1 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 1
   %val0 = load volatile float, ptr addrspace(1) %in.gep.0, align 4
@@ -94,8 +94,8 @@ define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f32(ptr addrspac
 ; GCN-DAG: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]]
 ; GCN: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127
 ; GCN: s_endpgm
-define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %x.i
   %in.gep.1 = getelementptr double, ptr addrspace(1) %in.gep.0, i32 1
   %val0 = load volatile double, ptr addrspace(1) %in.gep.0, align 8
@@ -116,8 +116,8 @@ define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f64(ptr addrspac
 ; GCN-NOT: ds_write2st64_b64
 ; GCN: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @byte_size_only_divisible_64_write2st64_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) #0 {
-  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @byte_size_only_divisible_64_write2st64_f64(ptr addrspace(1) %C, ptr addrspace(1) %in, ptr addrspace(3) %lds) nounwind {
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.gep = getelementptr double, ptr addrspace(1) %in, i32 %x.i
   %val = load double, ptr addrspace(1) %in.gep, align 8
   %arrayidx0 = getelementptr inbounds double, ptr addrspace(3) %lds, i32 %x.i
@@ -128,9 +128,5 @@ define amdgpu_kernel void @byte_size_only_divisible_64_write2st64_f64(ptr addrsp
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { convergent nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/dual-source-blend-export.ll b/llvm/test/CodeGen/AMDGPU/dual-source-blend-export.ll
index 6626945f49c8c9..d2365df8a2071d 100644
--- a/llvm/test/CodeGen/AMDGPU/dual-source-blend-export.ll
+++ b/llvm/test/CodeGen/AMDGPU/dual-source-blend-export.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
 
 ; This is a slightly modified IR from real case to make it concise.
-define amdgpu_ps void @_amdgpu_ps_main(i32 inreg %PrimMask, <2 x float> %InterpCenter) #0 {
+define amdgpu_ps void @_amdgpu_ps_main(i32 inreg %PrimMask, <2 x float> %InterpCenter) nounwind {
 ; GCN-LABEL: _amdgpu_ps_main:
 ; GCN:       ; %bb.0: ; %.entry
 ; GCN-NEXT:    s_mov_b32 s1, exec_lo
@@ -88,16 +88,10 @@ define amdgpu_ps void @_amdgpu_ps_main(i32 inreg %PrimMask, <2 x float> %InterpC
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #2
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #2
-declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32 immarg) #3
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #4
-declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #1
-declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #1
-declare float @llvm.amdgcn.lds.param.load(i32 immarg, i32 immarg, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { nounwind readnone willreturn }
-attributes #3 = { convergent nounwind readnone willreturn }
-attributes #4 = { inaccessiblememonly nounwind willreturn writeonly }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone willreturn
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone willreturn
+declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32 immarg) convergent nounwind readnone willreturn
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) inaccessiblememonly nounwind willreturn writeonly
+declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) nounwind readnone speculatable willreturn
+declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) nounwind readnone speculatable willreturn
+declare float @llvm.amdgcn.lds.param.load(i32 immarg, i32 immarg, i32) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index 05558c555c581e..db3b964e33dd70 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -13,7 +13,7 @@ define internal void @indirect() {
   ret void
 }
 
-define amdgpu_kernel void @test_simple_indirect_call() #0 {
+define amdgpu_kernel void @test_simple_indirect_call() "amdgpu-no-dispatch-id" {
 ; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
 ; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
 ; AKF_GCN-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -37,8 +37,6 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
   ret void
 }
 
-attributes #0 = { "amdgpu-no-dispatch-id" }
-
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
index 16f16f56248cbf..39343756994e2d 100644
--- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
@@ -4,7 +4,7 @@
 ; Don't crash.
 
 ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nocallback nofree nosync nounwind readnone speculatable willreturn
 
 declare ptr @__kmpc_alloc_shared()
 
@@ -107,8 +107,6 @@ define weak_odr void @test(i32 %0) !dbg !34 {
   ret void
 }
 
-attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
-
 !llvm.dbg.cu = !{!0, !25, !26}
 !llvm.module.flags = !{!27, !28, !29, !30, !31, !32, !44}
 
diff --git a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll
index 95577b44db7645..d71e3829275a7b 100644
--- a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll
+++ b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll
@@ -12,7 +12,7 @@
 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], [[VAL]], [[VAL]]
 ; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], [[ADD]], [[VAL]], vcc
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %v = load float, ptr addrspace(1) %in
   %cc = fcmp oeq float %v, 1.000000e+00
@@ -34,7 +34,7 @@ endif:
 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[VAL]], [[VAL]]
 ; GCN-DAG: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VAL]], [[VAL]]
 ; GCN: buffer_store_dword [[MUL]]
-define amdgpu_kernel void @test_vccnz_ifcvt_diamond(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_diamond(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %v = load float, ptr addrspace(1) %in
   %cc = fcmp oeq float %v, 1.000000e+00
@@ -60,14 +60,14 @@ endif:
 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc
 ; GCN: s_mov_b64 vcc, [[CMP]]
 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle_vcc_clobber(ptr addrspace(1) %out, ptr addrspace(1) %in, float %k) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle_vcc_clobber(ptr addrspace(1) %out, ptr addrspace(1) %in, float %k) nounwind {
 entry:
   %v = load i32, ptr addrspace(1) %in
   %cc = fcmp oeq float %k, 1.000000e+00
   br i1 %cc, label %if, label %endif
 
 if:
-  call void asm "; clobber $0", "~{vcc}"() #0
+  call void asm "; clobber $0", "~{vcc}"() nounwind
   %u = add i32 %v, %v
   br label %endif
 
@@ -89,7 +89,7 @@ endif:
 ; GCN: v_mul_f32
 ; GCN: v_mul_f32
 ; GCN: v_cndmask_b32_e32
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle_max_cheap(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle_max_cheap(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %v = load float, ptr addrspace(1) %in
   %cc = fcmp oeq float %v, 1.000000e+00
@@ -130,7 +130,7 @@ endif:
 
 ; GCN: [[ENDIF]]:
 ; GCN: buffer_store_dword
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle_min_expensive(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle_min_expensive(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %v = load float, ptr addrspace(1) %in
   %cc = fcmp oeq float %v, 1.000000e+00
@@ -164,7 +164,7 @@ endif:
 
 ; GCN: [[ENDIF]]:
 ; GCN: buffer_store_dword
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle_expensive(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle_expensive(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %v = load float, ptr addrspace(1) %in
   %cc = fcmp oeq float %v, 1.000000e+00
@@ -189,7 +189,7 @@ endif:
 
 ; GCN: [[ENDIF]]:
 ; GCN: buffer_store_dword
-define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(ptr addrspace(1) %out, ptr addrspace(4) %in, float %cnd) #0 {
+define amdgpu_kernel void @test_vccnz_sgpr_ifcvt_triangle(ptr addrspace(1) %out, ptr addrspace(4) %in, float %cnd) nounwind {
 entry:
   %v = load i32, ptr addrspace(4) %in
   %cc = fcmp oeq float %cnd, 1.000000e+00
@@ -208,7 +208,7 @@ endif:
 
 ; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_constant_load:
 ; GCN: v_cndmask_b32
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle_constant_load(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 entry:
   %v = load float, ptr addrspace(4) %in
   %cc = fcmp oeq float %v, 1.000000e+00
@@ -229,7 +229,7 @@ endif:
 
 ; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_argload:
 ; GCN: v_cndmask_b32
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle_argload(ptr addrspace(1) %out, float %v) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle_argload(ptr addrspace(1) %out, float %v) nounwind {
 entry:
   %cc = fcmp oeq float %v, 1.000000e+00
   br i1 %cc, label %if, label %endif
@@ -250,7 +250,7 @@ endif:
 ; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]]
 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
 ; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[VAL]], [[ADD]]
-define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(ptr addrspace(4) %in, i32 %cond) #0 {
+define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(ptr addrspace(4) %in, i32 %cond) nounwind {
 entry:
   %v = load i32, ptr addrspace(4) %in
   %cc = icmp eq i32 %cond, 1
@@ -262,7 +262,7 @@ if:
 
 endif:
   %r = phi i32 [ %v, %entry ], [ %u, %if ]
-  call void asm sideeffect "; reg use $0", "s"(i32 %r) #0
+  call void asm sideeffect "; reg use $0", "s"(i32 %r) nounwind
   ret void
 }
 
@@ -276,7 +276,7 @@ endif:
 
 ; GCN: [[ENDIF]]:
 ; GCN: buffer_store_dword
-define amdgpu_kernel void @test_scc1_vgpr_ifcvt_triangle(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %cond) #0 {
+define amdgpu_kernel void @test_scc1_vgpr_ifcvt_triangle(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %cond) nounwind {
 entry:
   %v = load float, ptr addrspace(1) %in
   %cc = icmp eq i32 %cond, 1
@@ -297,7 +297,7 @@ endif:
 ; GCN: s_addc_u32
 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
 ; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(ptr addrspace(4) %in, i32 %cond) #0 {
+define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle64(ptr addrspace(4) %in, i32 %cond) nounwind {
 entry:
   %v = load i64, ptr addrspace(4) %in
   %cc = icmp eq i32 %cond, 1
@@ -309,7 +309,7 @@ if:
 
 endif:
   %r = phi i64 [ %v, %entry ], [ %u, %if ]
-  call void asm sideeffect "; reg use $0", "s"(i64 %r) #0
+  call void asm sideeffect "; reg use $0", "s"(i64 %r) nounwind
   ret void
 }
 
@@ -322,7 +322,7 @@ endif:
 ; GCN-NEXT: s_cselect_b32 s
 ; GCN-NEXT: s_cselect_b32 s
 ; GCN-NEXT: s_cselect_b32 s
-define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(ptr addrspace(4) %in, i32 %cond) #0 {
+define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle96(ptr addrspace(4) %in, i32 %cond) nounwind {
 entry:
   %v = load <3 x i32>, ptr addrspace(4) %in
   %cc = icmp eq i32 %cond, 1
@@ -335,7 +335,7 @@ if:
 endif:
   %r = phi <3 x i32> [ %v, %entry ], [ %u, %if ]
   %r.ext = shufflevector <3 x i32> %r, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r.ext) #0
+  call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r.ext) nounwind
   ret void
 }
 
@@ -347,7 +347,7 @@ endif:
 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1
 ; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN-NEXT: s_cselect_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(ptr addrspace(4) %in, i32 %cond) #0 {
+define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle128(ptr addrspace(4) %in, i32 %cond) nounwind {
 entry:
   %v = load <4 x i32>, ptr addrspace(4) %in
   %cc = icmp eq i32 %cond, 1
@@ -359,7 +359,7 @@ if:
 
 endif:
   %r = phi <4 x i32> [ %v, %entry ], [ %u, %if ]
-  call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r) #0
+  call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r) nounwind
   ret void
 }
 
@@ -412,7 +412,7 @@ done:
 
 ; GCN: [[ENDIF]]:
 ; GCN: buffer_store_dword
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle256(ptr addrspace(1) %out, ptr addrspace(1) %in, float %cnd) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle256(ptr addrspace(1) %out, ptr addrspace(1) %in, float %cnd) nounwind {
 entry:
   %v = load <8 x i32>, ptr addrspace(1) %in
   %cc = fcmp oeq float %cnd, 1.000000e+00
@@ -437,7 +437,7 @@ endif:
 
 ; GCN: [[ENDIF]]:
 ; GCN: buffer_store_dword
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle512(ptr addrspace(1) %out, ptr addrspace(1) %in, float %cnd) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle512(ptr addrspace(1) %out, ptr addrspace(1) %in, float %cnd) nounwind {
 entry:
   %v = load <16 x i32>, ptr addrspace(1) %in
   %cc = fcmp oeq float %cnd, 1.000000e+00
@@ -452,5 +452,3 @@ endif:
   store <16 x i32> %r, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir
index 99ea18c37050bc..f59f84a8554bfe 100644
--- a/llvm/test/CodeGen/AMDGPU/early-term.mir
+++ b/llvm/test/CodeGen/AMDGPU/early-term.mir
@@ -23,16 +23,13 @@
     ret void
   }
 
-  define amdgpu_ps void @early_term_no_export() #0 {
+  define amdgpu_ps void @early_term_no_export() "amdgpu-color-export"="0" "amdgpu-depth-export"="0" {
     ret void
   }
 
-  define amdgpu_ps void @early_term_depth_only() #1 {
+  define amdgpu_ps void @early_term_depth_only() "amdgpu-color-export"="0" "amdgpu-depth-export"="1" {
     ret void
   }
-
-  attributes #0 = { "amdgpu-color-export"="0" "amdgpu-depth-export"="0" }
-  attributes #1 = { "amdgpu-color-export"="0" "amdgpu-depth-export"="1" }
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/elf.ll b/llvm/test/CodeGen/AMDGPU/elf.ll
index f51d9fc5125ba6..d3a3031d9cc9b6 100644
--- a/llvm/test/CodeGen/AMDGPU/elf.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf.ll
@@ -24,13 +24,11 @@
 ; TONGA-NEXT: .long   704
 ; CONFIG: .p2align 8
 ; CONFIG: test:
-define amdgpu_ps void @test(i32 %p) #0 {
+define amdgpu_ps void @test(i32 %p) nounwind {
    %i = add i32 %p, 2
    %r = bitcast i32 %i to float
    call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r, float %r, float %r, float %r, i1 true, i1 false)
    ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
index 655c5cd184a1ed..a606bd699c3b80 100644
--- a/llvm/test/CodeGen/AMDGPU/else.ll
+++ b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: {{^}}else_no_execfix:
 ; CHECK: ; %Flow
 ; CHECK-NEXT: s_andn2_saveexec_b64 [[DST:s\[[0-9]+:[0-9]+\]]], [[DST]]
-define amdgpu_ps float @else_no_execfix(i32 %z, float %v) #0 {
+define amdgpu_ps float @else_no_execfix(i32 %z, float %v) nounwind {
 main_body:
   %cc = icmp sgt i32 %z, 5
   br i1 %cc, label %if, label %else
@@ -31,7 +31,7 @@ end:
 ; CHECK-NEXT: s_and_b64 [[AND_INIT:s\[[0-9]+:[0-9]+\]]], exec, [[DST]]
 ; CHECK-NEXT: s_xor_b64 exec, exec, [[AND_INIT]]
 ; CHECK-NEXT: s_cbranch_execz
-define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) #0 {
+define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) nounwind {
 main_body:
   %cc = icmp sgt i32 %z, 5
   br i1 %cc, label %if, label %else
@@ -52,9 +52,5 @@ end:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind writeonly }
-attributes #2 = { nounwind readonly }
+declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) nounwind writeonly
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/empty-function.ll b/llvm/test/CodeGen/AMDGPU/empty-function.ll
index dba51225b6989c..1847f8de3ead57 100644
--- a/llvm/test/CodeGen/AMDGPU/empty-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/empty-function.ll
@@ -7,15 +7,13 @@
 ; SI-LABEL: {{^}}empty_function_ret:
 ; SI: s_endpgm
 ; SI: codeLenInByte = 4
-define amdgpu_kernel void @empty_function_ret() #0 {
+define amdgpu_kernel void @empty_function_ret() nounwind {
   ret void
 }
 
 ; SI: .text
 ; SI-LABEL: {{^}}empty_function_unreachable:
 ; SI: codeLenInByte = 0
-define amdgpu_kernel void @empty_function_unreachable() #0 {
+define amdgpu_kernel void @empty_function_unreachable() nounwind {
   unreachable
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll b/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll
index 3ef0bb87f6e650..ea9758c869deb3 100644
--- a/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll
+++ b/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll
@@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 ; GCN-LABEL: {{^}}fneg_fsub_f32_fmf:
 ; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
 ; GCN-FMF-NOT: xor
-define amdgpu_kernel void @fneg_fsub_f32_fmf(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fneg_fsub_f32_fmf(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %add = add i32 %tid, 1
   %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid
@@ -23,7 +23,7 @@ define amdgpu_kernel void @fneg_fsub_f32_fmf(ptr addrspace(1) %out, ptr addrspac
 ; GCN-LABEL: {{^}}fneg_fsub_f32_safe:
 ; GCN: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
-define amdgpu_kernel void @fneg_fsub_f32_safe(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fneg_fsub_f32_safe(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %add = add i32 %tid, 1
   %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid
@@ -35,5 +35,3 @@ define amdgpu_kernel void @fneg_fsub_f32_safe(ptr addrspace(1) %out, ptr addrspa
   store float %neg.result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll b/llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll
index 00c5e0abf65062..64e56470bf8903 100644
--- a/llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll
+++ b/llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll
@@ -14,7 +14,7 @@
 ; CHECK: s_cbranch_execnz [[LOOP_LABEL]]
 define amdgpu_kernel void @test(ptr addrspace(1) %out) {
 entry:
-  %cond = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %cond = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp0 = icmp eq i32 %cond, 0
   br i1 %tmp0, label %if, label %loop
 
@@ -34,6 +34,4 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
index 9391b50c04a5f7..596b3aa1c0141d 100644
--- a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
@@ -57,7 +57,7 @@ entry:
   ret void
 }
 
-define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, ptr addrspace(1), i8 }> %arg) #0 {
+define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, ptr addrspace(1), i8 }> %arg) "enqueued-block" {
 entry:
   %.fca.3.extract = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> %arg, 2
   %.fca.4.extract = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> %arg, 3
@@ -67,7 +67,7 @@ entry:
 
 declare i32 @__enqueue_kernel_basic(ptr addrspace(1), i32, ptr addrspace(5), ptr, ptr) local_unnamed_addr
 
-define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg) #0 {
+define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg) "enqueued-block" {
 entry:
   %.fca.3.extract = extractvalue <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg, 2
   %.fca.4.extract = extractvalue <{ i32, i32, ptr addrspace(1), ptr addrspace(1), i64, i8 }> %arg, 3
@@ -80,7 +80,7 @@ entry:
 
 @kernel_address_user = global [1 x ptr] [ ptr @block_has_used_kernel_address ]
 
-define internal amdgpu_kernel void @block_has_used_kernel_address(<{ i32, i32, ptr addrspace(1), i8 }> %arg) #0 {
+define internal amdgpu_kernel void @block_has_used_kernel_address(<{ i32, i32, ptr addrspace(1), i8 }> %arg) "enqueued-block" {
 entry:
   %.fca.3.extract = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> %arg, 2
   %.fca.4.extract = extractvalue <{ i32, i32, ptr addrspace(1), i8 }> %arg, 3
@@ -93,15 +93,13 @@ define amdgpu_kernel void @user_of_kernel_address(ptr addrspace(1) %arg) {
   ret void
 }
 
-define internal amdgpu_kernel void @0(<{ i32, i32, ptr addrspace(1), i8 }> %arg) #0 {
+define internal amdgpu_kernel void @0(<{ i32, i32, ptr addrspace(1), i8 }> %arg) "enqueued-block" {
   ret void
 }
 
-define internal amdgpu_kernel void @1(<{ i32, i32, ptr addrspace(1), i8 }> %arg) #0 {
+define internal amdgpu_kernel void @1(<{ i32, i32, ptr addrspace(1), i8 }> %arg) "enqueued-block" {
   ret void
 }
-
-attributes #0 = { "enqueued-block" }
 ;.
 ; CHECK: @[[KERNEL_ADDRESS_USER:[a-zA-Z0-9_$"\\.-]+]] = global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @block_has_used_kernel_address.runtime_handle to ptr)]
 ; CHECK: @[[__TEST_BLOCK_INVOKE_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) externally_initialized constant [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
diff --git a/llvm/test/CodeGen/AMDGPU/exceed-max-sgprs.ll b/llvm/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
index 7fbd6ebe84d771..3040aacce4b4c0 100644
--- a/llvm/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
@@ -1,7 +1,7 @@
 ; RUN: not llc -mtriple=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s
 
 ; ERROR: error: <unknown>:0:0: scalar registers (106) exceeds limit (104) in function 'use_too_many_sgprs_tahiti'
-define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 {
+define amdgpu_kernel void @use_too_many_sgprs_tahiti() nounwind "target-cpu"="tahiti" {
   call void asm sideeffect "", "~{s[0:7]}" ()
   call void asm sideeffect "", "~{s[8:15]}" ()
   call void asm sideeffect "", "~{s[16:23]}" ()
@@ -20,7 +20,7 @@ define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 {
 }
 
 ; ERROR: error: <unknown>:0:0: scalar registers (106) exceeds limit (104) in function 'use_too_many_sgprs_bonaire'
-define amdgpu_kernel void @use_too_many_sgprs_bonaire() #1 {
+define amdgpu_kernel void @use_too_many_sgprs_bonaire() nounwind "target-cpu"="bonaire" {
   call void asm sideeffect "", "~{s[0:7]}" ()
   call void asm sideeffect "", "~{s[8:15]}" ()
   call void asm sideeffect "", "~{s[16:23]}" ()
@@ -39,7 +39,7 @@ define amdgpu_kernel void @use_too_many_sgprs_bonaire() #1 {
 }
 
 ; ERROR: error: <unknown>:0:0: scalar registers (108) exceeds limit (104) in function 'use_too_many_sgprs_bonaire_flat_scr'
-define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() #1 {
+define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() nounwind "target-cpu"="bonaire" {
   call void asm sideeffect "", "~{s[0:7]}" ()
   call void asm sideeffect "", "~{s[8:15]}" ()
   call void asm sideeffect "", "~{s[16:23]}" ()
@@ -59,7 +59,7 @@ define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() #1 {
 }
 
 ; ERROR: error: <unknown>:0:0: scalar registers (98) exceeds limit (96) in function 'use_too_many_sgprs_iceland'
-define amdgpu_kernel void @use_too_many_sgprs_iceland() #2 {
+define amdgpu_kernel void @use_too_many_sgprs_iceland() nounwind "target-cpu"="iceland" {
   call void asm sideeffect "", "~{vcc}" ()
   call void asm sideeffect "", "~{s[0:7]}" ()
   call void asm sideeffect "", "~{s[8:15]}" ()
@@ -77,7 +77,7 @@ define amdgpu_kernel void @use_too_many_sgprs_iceland() #2 {
 }
 
 ; ERROR: error: <unknown>:0:0: addressable scalar registers (103) exceeds limit (102) in function 'use_too_many_sgprs_fiji'
-define amdgpu_kernel void @use_too_many_sgprs_fiji() #3 {
+define amdgpu_kernel void @use_too_many_sgprs_fiji() nounwind "target-cpu"="fiji" {
   call void asm sideeffect "", "~{s[0:7]}" ()
   call void asm sideeffect "", "~{s[8:15]}" ()
   call void asm sideeffect "", "~{s[16:23]}" ()
@@ -95,8 +95,3 @@ define amdgpu_kernel void @use_too_many_sgprs_fiji() #3 {
   call void asm sideeffect "", "~{s102}" ()
   ret void
 }
-
-attributes #0 = { nounwind "target-cpu"="tahiti" }
-attributes #1 = { nounwind "target-cpu"="bonaire" }
-attributes #2 = { nounwind "target-cpu"="iceland" }
-attributes #3 = { nounwind "target-cpu"="fiji" }
diff --git a/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll b/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
index 2e9bfc891629a8..d4ef0bac91b3cb 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
@@ -4,7 +4,7 @@
 ; GCN: v_and_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[VAL16]]
 define amdgpu_kernel void @and_zext(ptr addrspace(1) %out, ptr addrspace(1) %in) {
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %ptr = getelementptr i16, ptr addrspace(1) %in, i32 %id
   %a = load i16, ptr addrspace(1) %in
   %b = load i16, ptr addrspace(1) %ptr
@@ -19,7 +19,7 @@ define amdgpu_kernel void @and_zext(ptr addrspace(1) %out, ptr addrspace(1) %in)
 ; GCN: v_or_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[VAL16]]
 define amdgpu_kernel void @or_zext(ptr addrspace(1) %out, ptr addrspace(1) %in) {
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %ptr = getelementptr i16, ptr addrspace(1) %in, i32 %id
   %a = load i16, ptr addrspace(1) %in
   %b = load i16, ptr addrspace(1) %ptr
@@ -34,7 +34,7 @@ define amdgpu_kernel void @or_zext(ptr addrspace(1) %out, ptr addrspace(1) %in)
 ; GCN: v_xor_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[VAL16]]
 define amdgpu_kernel void @xor_zext(ptr addrspace(1) %out, ptr addrspace(1) %in) {
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %ptr = getelementptr i16, ptr addrspace(1) %in, i32 %id
   %a = load i16, ptr addrspace(1) %in
   %b = load i16, ptr addrspace(1) %ptr
@@ -45,6 +45,4 @@ define amdgpu_kernel void @xor_zext(ptr addrspace(1) %out, ptr addrspace(1) %in)
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
index 70011e56d016e0..df6fbd6c603554 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-define amdgpu_kernel void @extract_vector_elt_v2f16(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2f16(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) nounwind {
 ; SI-LABEL: extract_vector_elt_v2f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -59,7 +59,7 @@ define amdgpu_kernel void @extract_vector_elt_v2f16(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, i32 %idx) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, i32 %idx) nounwind {
 ; SI-LABEL: extract_vector_elt_v2f16_dynamic_sgpr:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -116,7 +116,7 @@ define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_sgpr(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, ptr addrspace(1) %idx.ptr) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, ptr addrspace(1) %idx.ptr) nounwind {
 ; SI-LABEL: extract_vector_elt_v2f16_dynamic_vgpr:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -190,7 +190,7 @@ define amdgpu_kernel void @extract_vector_elt_v2f16_dynamic_vgpr(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @extract_vector_elt_v3f16(ptr addrspace(1) %out, <3 x half> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v3f16(ptr addrspace(1) %out, <3 x half> %foo) nounwind {
 ; SI-LABEL: extract_vector_elt_v3f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -244,7 +244,7 @@ define amdgpu_kernel void @extract_vector_elt_v3f16(ptr addrspace(1) %out, <3 x
 }
 
 ; FIXME: Why sometimes vector shift?
-define amdgpu_kernel void @dynamic_extract_vector_elt_v3f16(ptr addrspace(1) %out, <3 x half> %foo, i32 %idx) #0 {
+define amdgpu_kernel void @dynamic_extract_vector_elt_v3f16(ptr addrspace(1) %out, <3 x half> %foo, i32 %idx) nounwind {
 ; SI-LABEL: dynamic_extract_vector_elt_v3f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -297,7 +297,7 @@ define amdgpu_kernel void @dynamic_extract_vector_elt_v3f16(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_extractelement_v4f16_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_extractelement_v4f16_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: v_extractelement_v4f16_2:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -346,7 +346,7 @@ define amdgpu_kernel void @v_extractelement_v4f16_2(ptr addrspace(1) %out, ptr a
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -356,7 +356,7 @@ define amdgpu_kernel void @v_extractelement_v4f16_2(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v4f16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_insertelement_v4f16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: v_insertelement_v4f16_dynamic_vgpr:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -420,7 +420,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_vgpr(ptr addrspace(1) %
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -431,7 +431,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_vgpr(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @reduce_load_vector_v8f16_extract_01(ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @reduce_load_vector_v8f16_extract_01(ptr addrspace(4) %ptr) nounwind {
 ; SI-LABEL: reduce_load_vector_v8f16_extract_01:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -492,7 +492,7 @@ define amdgpu_kernel void @reduce_load_vector_v8f16_extract_01(ptr addrspace(4)
   ret void
 }
 
-define amdgpu_kernel void @reduce_load_vector_v8f16_extract_23(ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @reduce_load_vector_v8f16_extract_23(ptr addrspace(4) %ptr) nounwind {
 ; SI-LABEL: reduce_load_vector_v8f16_extract_23:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -553,7 +553,7 @@ define amdgpu_kernel void @reduce_load_vector_v8f16_extract_23(ptr addrspace(4)
   ret void
 }
 
-define amdgpu_kernel void @v_extractelement_v8f16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %n) #0 {
+define amdgpu_kernel void @v_extractelement_v8f16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %n) nounwind {
 ; SI-LABEL: v_extractelement_v8f16_dynamic_sgpr:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -691,7 +691,7 @@ define amdgpu_kernel void @v_extractelement_v8f16_dynamic_sgpr(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <8 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -701,7 +701,7 @@ define amdgpu_kernel void @v_extractelement_v8f16_dynamic_sgpr(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_extractelement_v16f16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %n) #0 {
+define amdgpu_kernel void @v_extractelement_v16f16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %n) nounwind {
 ; SI-LABEL: v_extractelement_v16f16_dynamic_sgpr:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -944,7 +944,7 @@ define amdgpu_kernel void @v_extractelement_v16f16_dynamic_sgpr(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <16 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
@@ -954,7 +954,4 @@ define amdgpu_kernel void @v_extractelement_v16f16_dynamic_sgpr(ptr addrspace(1)
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll
index e8efe0bfc55420..e5abbd087dc879 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll
@@ -5,7 +5,7 @@
 ; GCN: buffer_load_dwordx4
 ; GCN: buffer_load_dwordx2
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @extract_vector_elt_v3f64_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @extract_vector_elt_v3f64_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %ld = load volatile <3 x double>, ptr addrspace(1) %in
   %elt = extractelement <3 x double> %ld, i32 2
   store volatile double %elt, ptr addrspace(1) %out
@@ -21,7 +21,7 @@ define amdgpu_kernel void @extract_vector_elt_v3f64_2(ptr addrspace(1) %out, ptr
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
-define amdgpu_kernel void @dyn_extract_vector_elt_v3f64(ptr addrspace(1) %out, <3 x double> %foo, i32 %elt) #0 {
+define amdgpu_kernel void @dyn_extract_vector_elt_v3f64(ptr addrspace(1) %out, <3 x double> %foo, i32 %elt) nounwind {
   %dynelt = extractelement <3 x double> %foo, i32 %elt
   store volatile double %dynelt, ptr addrspace(1) %out
   ret void
@@ -39,10 +39,8 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v3f64(ptr addrspace(1) %out, <
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
-define amdgpu_kernel void @dyn_extract_vector_elt_v4f64(ptr addrspace(1) %out, <4 x double> %foo, i32 %elt) #0 {
+define amdgpu_kernel void @dyn_extract_vector_elt_v4f64(ptr addrspace(1) %out, <4 x double> %foo, i32 %elt) nounwind {
   %dynelt = extractelement <4 x double> %foo, i32 %elt
   store volatile double %dynelt, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
index b69852da247445..e35dd2c94883db 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
@@ -12,7 +12,7 @@
 ; GFX9: v_mov_b32_e32 [[VVEC:v[0-9]+]], [[VEC]]
 ; GFX9: global_store_short_d16_hi v{{[0-9]+}}, [[VVEC]],
 ; GFX9: buffer_store_short [[VVEC]],
-define amdgpu_kernel void @extract_vector_elt_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) nounwind {
   %vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
   %p0 = extractelement <2 x i16> %vec, i32 0
   %p1 = extractelement <2 x i16> %vec, i32 1
@@ -30,7 +30,7 @@ define amdgpu_kernel void @extract_vector_elt_v2i16(ptr addrspace(1) %out, ptr a
 ; GCN: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
 ; GCN: buffer_store_short [[VELT1]]
 ; GCN: ScratchSize: 0
-define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i32 %idx) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i32 %idx) nounwind {
   %vec = load <2 x i16>, ptr addrspace(4) %vec.ptr
   %elt = extractelement <2 x i16> %vec, i32 %idx
   store i16 %elt, ptr addrspace(1) %out, align 2
@@ -48,7 +48,7 @@ define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_sgpr(ptr addrspace(1
 ; SI: buffer_store_short [[ELT]]
 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[ELT]]
 ; GCN: ScratchSize: 0{{$}}
-define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, ptr addrspace(1) %idx.ptr) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, ptr addrspace(1) %idx.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, ptr addrspace(1) %idx.ptr, i64 %tid.ext
@@ -67,7 +67,7 @@ define amdgpu_kernel void @extract_vector_elt_v2i16_dynamic_vgpr(ptr addrspace(1
 
 ; GCN: buffer_store_short
 ; GCN: buffer_store_short
-define amdgpu_kernel void @extract_vector_elt_v3i16(ptr addrspace(1) %out, <3 x i16> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v3i16(ptr addrspace(1) %out, <3 x i16> %foo) nounwind {
   %p0 = extractelement <3 x i16> %foo, i32 0
   %p1 = extractelement <3 x i16> %foo, i32 2
   %out1 = getelementptr i16, ptr addrspace(1) %out, i32 1
@@ -86,7 +86,7 @@ define amdgpu_kernel void @extract_vector_elt_v3i16(ptr addrspace(1) %out, <3 x
 ; GFX89-DAG: buffer_store_short [[VLOAD0]], off
 ; GFX89-DAG: v_mov_b32_e32 [[VLOAD1:v[0-9]+]], s[[#LOAD + 3]]
 ; GFX89-DAG: buffer_store_short [[VLOAD1]], off
-define amdgpu_kernel void @extract_vector_elt_v4i16(ptr addrspace(1) %out, <4 x i16> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v4i16(ptr addrspace(1) %out, <4 x i16> %foo) nounwind {
   %p0 = extractelement <4 x i16> %foo, i32 0
   %p1 = extractelement <4 x i16> %foo, i32 2
   %out1 = getelementptr i16, ptr addrspace(1) %out, i32 10
@@ -116,7 +116,7 @@ define amdgpu_kernel void @extract_vector_elt_v4i16(ptr addrspace(1) %out, <4 x
 ; GCN-DAG: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 4
 ; GCN: s_lshr_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s
 ; GCN: {{buffer|global}}_store_short
-define amdgpu_kernel void @dynamic_extract_vector_elt_v3i16(ptr addrspace(1) %out, [8 x i32], <3 x i16> %foo, i32 %idx) #0 {
+define amdgpu_kernel void @dynamic_extract_vector_elt_v3i16(ptr addrspace(1) %out, [8 x i32], <3 x i16> %foo, i32 %idx) nounwind {
   %p0 = extractelement <3 x i16> %foo, i32 %idx
   %out1 = getelementptr i16, ptr addrspace(1) %out, i32 1
   store i16 %p0, ptr addrspace(1) %out
@@ -124,8 +124,8 @@ define amdgpu_kernel void @dynamic_extract_vector_elt_v3i16(ptr addrspace(1) %ou
 }
 
 ; GCN-LABEL: {{^}}v_insertelement_v4i16_dynamic_sgpr:
-define amdgpu_kernel void @v_insertelement_v4i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_insertelement_v4i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext
@@ -141,7 +141,7 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_sgpr(ptr addrspace(1) %
 ; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0
 ; GCN-NOT: {{s|buffer|flat|global}}_load_
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
-define amdgpu_kernel void @reduce_load_vector_v8i16_extract_01(ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @reduce_load_vector_v8i16_extract_01(ptr addrspace(4) %ptr) nounwind {
   %load = load <16 x i16>, ptr addrspace(4) %ptr
   %elt0 = extractelement <16 x i16> %load, i32 0
   %elt1 = extractelement <16 x i16> %load, i32 1
@@ -156,7 +156,7 @@ define amdgpu_kernel void @reduce_load_vector_v8i16_extract_01(ptr addrspace(4)
 ; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], {{0x1|0x4}}
 ; GCN-NOT: {{s|buffer|flat|global}}_load_
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
-define amdgpu_kernel void @reduce_load_vector_v8i16_extract_23(ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @reduce_load_vector_v8i16_extract_23(ptr addrspace(4) %ptr) nounwind {
   %load = load <16 x i16>, ptr addrspace(4) %ptr
   %elt2 = extractelement <16 x i16> %load, i32 2
   %elt3 = extractelement <16 x i16> %load, i32 3
@@ -172,8 +172,8 @@ define amdgpu_kernel void @reduce_load_vector_v8i16_extract_23(ptr addrspace(4)
 ; VI: flat_store_short v[{{[0-9:]+}}], [[RES]]
 ; GFX9: global_load_dword [[RES:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9:]+}}] offset:4
 ; GFX9: global_store_short v{{[0-9]+}}, [[RES]]
-define amdgpu_kernel void @v_extractelement_v8i16_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_extractelement_v8i16_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <8 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext
@@ -190,8 +190,8 @@ define amdgpu_kernel void @v_extractelement_v8i16_2(ptr addrspace(1) %out, ptr a
 ; VI: flat_store_short v[{{[0-9:]+}}], [[RES]]
 ; GFX9: global_load_dword [[RES:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9:]+}}] offset:12
 ; GFX9: global_store_short v{{[0-9]+}}, [[RES]]
-define amdgpu_kernel void @v_extractelement_v8i16_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_extractelement_v8i16_6(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <8 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext
@@ -203,8 +203,8 @@ define amdgpu_kernel void @v_extractelement_v8i16_6(ptr addrspace(1) %out, ptr a
 
 ; GCN-LABEL: {{^}}v_extractelement_v8i16_dynamic_sgpr:
 ; GCN-COUNT-7: v_cndmask_b32_e32
-define amdgpu_kernel void @v_extractelement_v8i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %n) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_extractelement_v8i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %n) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <8 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext
@@ -221,8 +221,8 @@ define amdgpu_kernel void @v_extractelement_v8i16_dynamic_sgpr(ptr addrspace(1)
 ; VI: flat_store_short v[{{[0-9:]+}}], [[RES]]
 ; GFX9: global_load_dword [[RES:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9:]+}}] offset:4
 ; GFX9: global_store_short v{{[0-9]+}}, [[RES]]
-define amdgpu_kernel void @v_extractelement_v16i16_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_extractelement_v16i16_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <16 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext
@@ -239,8 +239,8 @@ define amdgpu_kernel void @v_extractelement_v16i16_2(ptr addrspace(1) %out, ptr
 ; VI: flat_store_short v[{{[0-9:]+}}], [[RES]]
 ; GFX9: global_load_dword [[RES:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9:]+}}] offset:12
 ; GFX9: global_store_short v{{[0-9]+}}, [[RES]]
-define amdgpu_kernel void @v_extractelement_v16i16_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_extractelement_v16i16_6(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <16 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext
@@ -252,8 +252,8 @@ define amdgpu_kernel void @v_extractelement_v16i16_6(ptr addrspace(1) %out, ptr
 
 ; GCN-LABEL: {{^}}v_extractelement_v16i16_dynamic_sgpr:
 ; GCN-COUNT-15: v_cndmask_b32_e32
-define amdgpu_kernel void @v_extractelement_v16i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %n) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_extractelement_v16i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %n) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <16 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext
@@ -263,7 +263,4 @@ define amdgpu_kernel void @v_extractelement_v16i16_dynamic_sgpr(ptr addrspace(1)
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll
index 6b6f6ff8e34dab..4d95256929a8c7 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll
@@ -8,7 +8,7 @@
 ; GCN: buffer_store_dword
 ; GCN: buffer_store_dword
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @extract_vector_elt_select_error(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %val) #0 {
+define amdgpu_kernel void @extract_vector_elt_select_error(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %val) nounwind {
   %vec = bitcast i64 %val to <2 x i32>
   %elt0 = extractelement <2 x i32> %vec, i32 0
   %elt1 = extractelement <2 x i32> %vec, i32 1
@@ -20,7 +20,7 @@ define amdgpu_kernel void @extract_vector_elt_select_error(ptr addrspace(1) %out
 }
 
 ; GCN-LABEL: {{^}}extract_vector_elt_v2i64:
-define amdgpu_kernel void @extract_vector_elt_v2i64(ptr addrspace(1) %out, <2 x i64> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2i64(ptr addrspace(1) %out, <2 x i64> %foo) nounwind {
   %p0 = extractelement <2 x i64> %foo, i32 0
   %p1 = extractelement <2 x i64> %foo, i32 1
   %out1 = getelementptr i64, ptr addrspace(1) %out, i32 1
@@ -35,7 +35,7 @@ define amdgpu_kernel void @extract_vector_elt_v2i64(ptr addrspace(1) %out, <2 x
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
-define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(ptr addrspace(1) %out, <2 x i64> %foo, i32 %elt) #0 {
+define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(ptr addrspace(1) %out, <2 x i64> %foo, i32 %elt) nounwind {
   %dynelt = extractelement <2 x i64> %foo, i32 %elt
   store volatile i64 %dynelt, ptr addrspace(1) %out
   ret void
@@ -49,7 +49,7 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(ptr addrspace(1) %out, <
 ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
 ; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]]
 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
-define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(ptr addrspace(1) %out, ptr addrspace(1) %foo, i32 %elt, <2 x i64> %arst) #0 {
+define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(ptr addrspace(1) %out, ptr addrspace(1) %foo, i32 %elt, <2 x i64> %arst) nounwind {
   %load = load volatile <2 x i64>, ptr addrspace(1) %foo
   %or = or <2 x i64> %load, %arst
   %dynelt = extractelement <2 x i64> %or, i32 %elt
@@ -66,7 +66,7 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(ptr addrspace(1) %out,
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
-define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(ptr addrspace(1) %out, <3 x i64> %foo, i32 %elt) #0 {
+define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(ptr addrspace(1) %out, <3 x i64> %foo, i32 %elt) nounwind {
   %dynelt = extractelement <3 x i64> %foo, i32 %elt
   store volatile i64 %dynelt, ptr addrspace(1) %out
   ret void
@@ -84,10 +84,8 @@ define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(ptr addrspace(1) %out, <
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN: store_dwordx2 v[{{[0-9:]+}}]
-define amdgpu_kernel void @dyn_extract_vector_elt_v4i64(ptr addrspace(1) %out, <4 x i64> %foo, i32 %elt) #0 {
+define amdgpu_kernel void @dyn_extract_vector_elt_v4i64(ptr addrspace(1) %out, <4 x i64> %foo, i32 %elt) nounwind {
   %dynelt = extractelement <4 x i64> %foo, i32 %elt
   store volatile i64 %dynelt, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll
index 331fe26160d412..91fbd8fe54040a 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll
@@ -5,7 +5,7 @@
 ; GCN: s_load_dword [[LOAD:s[0-9]+]]
 ; GCN: v_mov_b32_e32 [[V_LOAD:v[0-9]+]], [[LOAD]]
 ; GCN: buffer_store_byte [[V_LOAD]]
-define amdgpu_kernel void @extract_vector_elt_v1i8(ptr addrspace(1) %out, <1 x i8> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v1i8(ptr addrspace(1) %out, <1 x i8> %foo) nounwind {
   %p0 = extractelement <1 x i8> %foo, i32 0
   store i8 %p0, ptr addrspace(1) %out
   ret void
@@ -19,7 +19,7 @@ define amdgpu_kernel void @extract_vector_elt_v1i8(ptr addrspace(1) %out, <1 x i
 ; GCN-NOT: {{flat|buffer|global}}
 ; GCN: buffer_store_byte
 ; GCN: buffer_store_byte
-define amdgpu_kernel void @extract_vector_elt_v2i8(ptr addrspace(1) %out, <2 x i8> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v2i8(ptr addrspace(1) %out, <2 x i8> %foo) nounwind {
   %p0 = extractelement <2 x i8> %foo, i32 0
   %p1 = extractelement <2 x i8> %foo, i32 1
   %out1 = getelementptr i8, ptr addrspace(1) %out, i32 1
@@ -35,7 +35,7 @@ define amdgpu_kernel void @extract_vector_elt_v2i8(ptr addrspace(1) %out, <2 x i
 ; GCN-NOT: {{flat|buffer|global}}
 ; GCN: buffer_store_byte
 ; GCN: buffer_store_byte
-define amdgpu_kernel void @extract_vector_elt_v3i8(ptr addrspace(1) %out, <3 x i8> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v3i8(ptr addrspace(1) %out, <3 x i8> %foo) nounwind {
   %p0 = extractelement <3 x i8> %foo, i32 0
   %p1 = extractelement <3 x i8> %foo, i32 2
   %out1 = getelementptr i8, ptr addrspace(1) %out, i32 1
@@ -51,7 +51,7 @@ define amdgpu_kernel void @extract_vector_elt_v3i8(ptr addrspace(1) %out, <3 x i
 ; GCN-NOT: {{flat|buffer|global}}
 ; GCN: buffer_store_byte
 ; GCN: buffer_store_byte
-define amdgpu_kernel void @extract_vector_elt_v4i8(ptr addrspace(1) %out, <4 x i8> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v4i8(ptr addrspace(1) %out, <4 x i8> %foo) nounwind {
   %p0 = extractelement <4 x i8> %foo, i32 0
   %p1 = extractelement <4 x i8> %foo, i32 2
   %out1 = getelementptr i8, ptr addrspace(1) %out, i32 1
@@ -68,7 +68,7 @@ define amdgpu_kernel void @extract_vector_elt_v4i8(ptr addrspace(1) %out, <4 x i
 ; GCN-NOT: {{s|flat|buffer|global}}_load
 ; GCN: buffer_store_byte
 ; GCN: buffer_store_byte
-define amdgpu_kernel void @extract_vector_elt_v8i8(<8 x i8> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v8i8(<8 x i8> %foo) nounwind {
   %p0 = extractelement <8 x i8> %foo, i32 0
   %p1 = extractelement <8 x i8> %foo, i32 2
   store volatile i8 %p1, ptr addrspace(1) null
@@ -84,7 +84,7 @@ define amdgpu_kernel void @extract_vector_elt_v8i8(<8 x i8> %foo) #0 {
 ; GCN-DAG: v_mov_b32_e32 [[V_ELT2:v[0-9]+]], [[ELT2]]
 ; GCN: buffer_store_byte [[V_ELT2]]
 ; GCN: buffer_store_byte [[V_LOAD0]]
-define amdgpu_kernel void @extract_vector_elt_v16i8(ptr addrspace(1) %out, <16 x i8> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v16i8(ptr addrspace(1) %out, <16 x i8> %foo) nounwind {
   %p0 = extractelement <16 x i8> %foo, i32 0
   %p1 = extractelement <16 x i8> %foo, i32 2
   %out1 = getelementptr i8, ptr addrspace(1) %out, i32 1
@@ -102,7 +102,7 @@ define amdgpu_kernel void @extract_vector_elt_v16i8(ptr addrspace(1) %out, <16 x
 ; GCN-DAG: v_mov_b32_e32 [[V_ELT2:v[0-9]+]], [[ELT2]]
 ; GCN: buffer_store_byte [[V_ELT2]]
 ; GCN: buffer_store_byte [[V_LOAD0]]
-define amdgpu_kernel void @extract_vector_elt_v32i8(<32 x i8> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v32i8(<32 x i8> %foo) nounwind {
   %p0 = extractelement <32 x i8> %foo, i32 0
   %p1 = extractelement <32 x i8> %foo, i32 2
   store volatile i8 %p1, ptr addrspace(1) null
@@ -118,7 +118,7 @@ define amdgpu_kernel void @extract_vector_elt_v32i8(<32 x i8> %foo) #0 {
 ; GCN-DAG: v_mov_b32_e32 [[V_ELT2:v[0-9]+]], [[ELT2]]
 ; GCN: buffer_store_byte [[V_ELT2]]
 ; GCN: buffer_store_byte [[V_LOAD0]]
-define amdgpu_kernel void @extract_vector_elt_v64i8(ptr addrspace(1) %out, <64 x i8> %foo) #0 {
+define amdgpu_kernel void @extract_vector_elt_v64i8(ptr addrspace(1) %out, <64 x i8> %foo) nounwind {
   %p0 = extractelement <64 x i8> %foo, i32 0
   %p1 = extractelement <64 x i8> %foo, i32 2
   %out1 = getelementptr i8, ptr addrspace(1) %out, i32 1
@@ -140,7 +140,7 @@ define amdgpu_kernel void @extract_vector_elt_v64i8(ptr addrspace(1) %out, <64 x
 ; VI-DAG: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 3
 ; VI: v_lshrrev_b16_e32 [[ELT:v[0-9]+]], [[SCALED_IDX]], [[V_LOAD]]
 ; VI: buffer_store_byte [[ELT]]
-define amdgpu_kernel void @dynamic_extract_vector_elt_v2i8(ptr addrspace(1) %out, [8 x i32], <2 x i8> %foo, [8 x i32], i32 %idx) #0 {
+define amdgpu_kernel void @dynamic_extract_vector_elt_v2i8(ptr addrspace(1) %out, [8 x i32], <2 x i8> %foo, [8 x i32], i32 %idx) nounwind {
   %elt = extractelement <2 x i8> %foo, i32 %idx
   store volatile i8 %elt, ptr addrspace(1) %out
   ret void
@@ -154,7 +154,7 @@ define amdgpu_kernel void @dynamic_extract_vector_elt_v2i8(ptr addrspace(1) %out
 ; VI: s_lshr_b32 [[ELT:s[0-9]+]], [[LOAD]], [[SCALED_IDX]]
 ; VI: v_mov_b32_e32 [[V_ELT:v[0-9]+]], [[ELT]]
 ; VI: buffer_store_byte [[V_ELT]]
-define amdgpu_kernel void @dynamic_extract_vector_elt_v3i8(ptr addrspace(1) %out, [8 x i32], <3 x i8> %foo, [8 x i32], i32 %idx) #0 {
+define amdgpu_kernel void @dynamic_extract_vector_elt_v3i8(ptr addrspace(1) %out, [8 x i32], <3 x i8> %foo, [8 x i32], i32 %idx) nounwind {
   %p0 = extractelement <3 x i8> %foo, i32 %idx
   %out1 = getelementptr i8, ptr addrspace(1) %out, i32 1
   store volatile i8 %p0, ptr addrspace(1) %out
@@ -170,7 +170,7 @@ define amdgpu_kernel void @dynamic_extract_vector_elt_v3i8(ptr addrspace(1) %out
 
 ; VI: v_mov_b32_e32 [[V_EXTRACT:v[0-9]+]], [[EXTRACT]]
 ; VI: buffer_store_byte [[V_EXTRACT]]
-define amdgpu_kernel void @dynamic_extract_vector_elt_v4i8(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i32 %idx) #0 {
+define amdgpu_kernel void @dynamic_extract_vector_elt_v4i8(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i32 %idx) nounwind {
   %vec = load <4 x i8>, ptr addrspace(4) %vec.ptr
   %p0 = extractelement <4 x i8> %vec, i32 %idx
   %out1 = getelementptr i8, ptr addrspace(1) %out, i32 1
@@ -186,7 +186,7 @@ define amdgpu_kernel void @dynamic_extract_vector_elt_v4i8(ptr addrspace(1) %out
 ; VI: s_lshr_b64 s[[[EXTRACT_LO:[0-9]+]]:{{[0-9]+\]}}, [[VEC8]], [[SCALED_IDX]]
 ; VI: v_mov_b32_e32 [[V_EXTRACT:v[0-9]+]], s[[EXTRACT_LO]]
 ; VI: buffer_store_byte [[V_EXTRACT]]
-define amdgpu_kernel void @dynamic_extract_vector_elt_v8i8(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, i32 %idx) #0 {
+define amdgpu_kernel void @dynamic_extract_vector_elt_v8i8(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, i32 %idx) nounwind {
   %vec = load <8 x i8>, ptr addrspace(4) %vec.ptr
   %p0 = extractelement <8 x i8> %vec, i32 %idx
   %out1 = getelementptr i8, ptr addrspace(1) %out, i32 1
@@ -201,7 +201,7 @@ define amdgpu_kernel void @dynamic_extract_vector_elt_v8i8(ptr addrspace(1) %out
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 24
-define amdgpu_kernel void @reduce_load_vector_v8i8_extract_0123() #0 {
+define amdgpu_kernel void @reduce_load_vector_v8i8_extract_0123() nounwind {
   %load = load <8 x i8>, ptr addrspace(4) null
   %elt0 = extractelement <8 x i8> %load, i32 0
   %elt1 = extractelement <8 x i8> %load, i32 1
@@ -220,7 +220,7 @@ define amdgpu_kernel void @reduce_load_vector_v8i8_extract_0123() #0 {
 ; GCN-NOT: {{s|buffer|flat|global}}_load_
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8
-define amdgpu_kernel void @reduce_load_vector_v8i8_extract_0145() #0 {
+define amdgpu_kernel void @reduce_load_vector_v8i8_extract_0145() nounwind {
   %load = load <8 x i8>, ptr addrspace(4) null
   %elt0 = extractelement <8 x i8> %load, i32 0
   %elt1 = extractelement <8 x i8> %load, i32 1
@@ -239,7 +239,7 @@ define amdgpu_kernel void @reduce_load_vector_v8i8_extract_0145() #0 {
 ; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0{{$}}
 ; GCN-NOT: {{s|buffer|flat|global}}_load_
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8
-define amdgpu_kernel void @reduce_load_vector_v8i8_extract_45() #0 {
+define amdgpu_kernel void @reduce_load_vector_v8i8_extract_45() nounwind {
   %load = load <8 x i8>, ptr addrspace(4) null
   %elt4 = extractelement <8 x i8> %load, i32 4
   %elt5 = extractelement <8 x i8> %load, i32 5
@@ -255,7 +255,7 @@ define amdgpu_kernel void @reduce_load_vector_v8i8_extract_45() #0 {
 ; GCN-NOT: {{s|buffer|flat|global}}_load_
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8
 ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8
-define amdgpu_kernel void @reduce_load_vector_v16i8_extract_0145() #0 {
+define amdgpu_kernel void @reduce_load_vector_v16i8_extract_0145() nounwind {
   %load = load <16 x i8>, ptr addrspace(4) null
   %elt0 = extractelement <16 x i8> %load, i32 0
   %elt1 = extractelement <16 x i8> %load, i32 1
@@ -267,5 +267,3 @@ define amdgpu_kernel void @reduce_load_vector_v16i8_extract_0145() #0 {
   store volatile i8 %elt5, ptr addrspace(1) undef, align 1
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
index f34824cd6cefe1..04077a7c7bba28 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
@@ -290,7 +290,7 @@ define amdgpu_kernel void @fabs_fold_f16(ptr addrspace(1) %out, half %in0, half
   ret void
 }
 
-define amdgpu_kernel void @v_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: v_fabs_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
@@ -351,7 +351,7 @@ define amdgpu_kernel void @v_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @fabs_free_v2f16(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @fabs_free_v2f16(ptr addrspace(1) %out, i32 %in) nounwind {
 ; CI-LABEL: fabs_free_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -408,7 +408,7 @@ define amdgpu_kernel void @fabs_free_v2f16(ptr addrspace(1) %out, i32 %in) #0 {
 
 ; FIXME: Should do fabs after conversion to avoid converting multiple
 ; times in this particular case.
-define amdgpu_kernel void @v_fabs_fold_self_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fabs_fold_self_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: v_fabs_fold_self_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -490,7 +490,7 @@ define amdgpu_kernel void @v_fabs_fold_self_v2f16(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @v_fabs_fold_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %other.val) #0 {
+define amdgpu_kernel void @v_fabs_fold_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %other.val) nounwind {
 ; CI-LABEL: v_fabs_fold_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -579,7 +579,7 @@ define amdgpu_kernel void @v_fabs_fold_v2f16(ptr addrspace(1) %out, ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @v_extract_fabs_fold_v2f16(ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_extract_fabs_fold_v2f16(ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: v_extract_fabs_fold_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -670,7 +670,7 @@ define amdgpu_kernel void @v_extract_fabs_fold_v2f16(ptr addrspace(1) %in) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: v_extract_fabs_no_fold_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -747,11 +747,8 @@ define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(ptr addrspace(1) %in) #0
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
-declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare half @llvm.fabs.f16(half) nounwind readnone
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nounwind readnone
+declare <4 x half> @llvm.fabs.v4f16(<4 x half>) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
diff --git a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
index b36c6e707ebabc..8a7edc61772e56 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
@@ -9,7 +9,7 @@
 ; exists in the original program.
 
 ; (fadd (fma x, y, (fmul u, v), z) -> (fma x, y (fma u, v, z))
-define amdgpu_kernel void @fast_add_fmuladd_fmul() #0 {
+define amdgpu_kernel void @fast_add_fmuladd_fmul() nounwind {
 ; GCN-FLUSH-LABEL: fast_add_fmuladd_fmul:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -83,7 +83,7 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul() #0 {
   ret void
 }
 
-define amdgpu_kernel void @fast_sub_fmuladd_fmul() #0 {
+define amdgpu_kernel void @fast_sub_fmuladd_fmul() nounwind {
 ; GCN-FLUSH-LABEL: fast_sub_fmuladd_fmul:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -157,7 +157,7 @@ define amdgpu_kernel void @fast_sub_fmuladd_fmul() #0 {
   ret void
 }
 
-define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul() #0 {
+define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul() nounwind {
 ; GCN-FLUSH-LABEL: fast_add_fmuladd_fmul_multi_use_mul:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -240,7 +240,7 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul() #0 {
   ret void
 }
 
-define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul_commute() #0 {
+define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul_commute() nounwind {
 ; GCN-FLUSH-LABEL: fast_add_fmuladd_fmul_multi_use_mul_commute:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -323,7 +323,7 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul_commute() #0 {
   ret void
 }
 
-define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd() #0 {
+define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd() nounwind {
 ; GCN-FLUSH-LABEL: fast_add_fmuladd_fmul_multi_use_fmuladd:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -406,7 +406,7 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd() #0 {
   ret void
 }
 
-define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd_commute() #0 {
+define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd_commute() nounwind {
 ; GCN-FLUSH-LABEL: fast_add_fmuladd_fmul_multi_use_fmuladd_commute:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -489,7 +489,7 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd_commute() #0
   ret void
 }
 
-define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_mul() #0 {
+define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_mul() nounwind {
 ; GCN-FLUSH-LABEL: fast_sub_fmuladd_fmul_multi_use_mul:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -572,7 +572,7 @@ define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_mul() #0 {
   ret void
 }
 
-define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_lhs() #0 {
+define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_lhs() nounwind {
 ; GCN-FLUSH-LABEL: fast_sub_fmuladd_fmul_multi_use_fmuladd_lhs:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -655,7 +655,7 @@ define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_lhs() #0 {
   ret void
 }
 
-define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_rhs() #0 {
+define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_rhs() nounwind {
 ; GCN-FLUSH-LABEL: fast_sub_fmuladd_fmul_multi_use_fmuladd_rhs:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -738,7 +738,7 @@ define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_rhs() #0 {
   ret void
 }
 
-define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_lhs() #0 {
+define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_lhs() nounwind {
 ; GCN-FLUSH-LABEL: fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_lhs:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -828,7 +828,7 @@ define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_lhs() #
   ret void
 }
 
-define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_rhs() #0 {
+define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_rhs() nounwind {
 ; GCN-FLUSH-LABEL: fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_rhs:
 ; GCN-FLUSH:       ; %bb.0:
 ; GCN-FLUSH-NEXT:    s_mov_b32 s3, 0xf000
@@ -918,10 +918,7 @@ define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_rhs() #
   ret void
 }
 
-declare float @llvm.fma.f32(float, float, float) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
index cdc6b5a48d0a69..66f093a2e226d4 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
@@ -932,7 +932,4 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fadd.ll b/llvm/test/CodeGen/AMDGPU/fadd.ll
index e31f875785121d..b7a3389ea60bdc 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd.ll
@@ -5,7 +5,7 @@
 ; FUNC-LABEL: {{^}}fadd_f32:
 ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
 ; SI: v_add_f32
-define amdgpu_kernel void @fadd_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @fadd_f32(ptr addrspace(1) %out, float %a, float %b) nounwind {
    %add = fadd float %a, %b
    store float %add, ptr addrspace(1) %out, align 4
    ret void
@@ -16,7 +16,7 @@ define amdgpu_kernel void @fadd_f32(ptr addrspace(1) %out, float %a, float %b) #
 ; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
 ; SI: v_add_f32
 ; SI: v_add_f32
-define amdgpu_kernel void @fadd_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @fadd_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) nounwind {
   %add = fadd <2 x float> %a, %b
   store <2 x float> %add, ptr addrspace(1) %out, align 8
   ret void
@@ -31,7 +31,7 @@ define amdgpu_kernel void @fadd_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2
 ; SI: v_add_f32
 ; SI: v_add_f32
 ; SI: v_add_f32
-define amdgpu_kernel void @fadd_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fadd_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
   %a = load <4 x float>, ptr addrspace(1) %in, align 16
   %b = load <4 x float>, ptr addrspace(1) %b_ptr, align 16
@@ -57,7 +57,7 @@ define amdgpu_kernel void @fadd_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; SI: v_add_f32
 ; SI: v_add_f32
 ; SI: v_add_f32
-define amdgpu_kernel void @fadd_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @fadd_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) nounwind {
   %add = fadd <8 x float> %a, %b
   store <8 x float> %add, ptr addrspace(1) %out, align 32
   ret void
@@ -65,11 +65,8 @@ define amdgpu_kernel void @fadd_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8
 
 ; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32:
 ; SI-NOT: v_add_f32
-define amdgpu_kernel void @fadd_0_nsz_attr_f32(ptr addrspace(1) %out, float %a) #1 {
+define amdgpu_kernel void @fadd_0_nsz_attr_f32(ptr addrspace(1) %out, float %a) nounwind "no-signed-zeros-fp-math"="true" {
    %add = fadd nsz float %a, 0.0
    store float %add, ptr addrspace(1) %out, align 4
    ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fadd64.ll b/llvm/test/CodeGen/AMDGPU/fadd64.ll
index 1d3a16e942cf04..3efce63afe1fc4 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd64.ll
@@ -46,7 +46,4 @@ define amdgpu_kernel void @s_fadd_v2f64(ptr addrspace(1) %out, <2 x double> %r0,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll b/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll
index 8ff78aaccf5a3e..4888d3c3db2d76 100644
--- a/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll
@@ -14,6 +14,4 @@ define amdgpu_cs void @atomic_fadd(<4 x i32> inreg %arg0) {
   ret void
 }
 
-declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1 immarg) #0
-
-attributes #0 = { nounwind }
+declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index e1981972f58d1b..34a29303b19c32 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -223,7 +223,7 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(ptr add
 ; GCN-NOT: v_mul
 ; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
-define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %load = load half, ptr addrspace(1) %gep, align 2
@@ -271,7 +271,7 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(ptr addr
 ; GCN-NOT: v_max
 ; GCN-NOT: v_mul
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V]]
-define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %load = load float, ptr addrspace(1) %gep, align 4
@@ -471,7 +471,7 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_iee
   ret void
 }
 
-; define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_nnan_ieee_mode(ptr addrspace(1) %arg) #1 {
+; define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_nnan_ieee_mode(ptr addrspace(1) %arg) "no-nans-fp-math"="true" {
 ;   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
 ;   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
 ;   %load = load float, ptr addrspace(1) %gep, align 4
@@ -621,7 +621,7 @@ entry:
 ; GFX9-DENORM-NOT: v_max
 ; VI-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
 ; GFX9-FLUSH: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) "no-nans-fp-math"="true" {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
   %v = load float, ptr addrspace(1) %gep, align 4
@@ -636,7 +636,7 @@ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(ptr addrsp
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[V]]
 ; GCN-NOT: v_mul_
 ; GCN-NOT: v_max_
-define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(ptr addrspace(1) %arg, ptr addrspace(1) %out) "no-nans-fp-math"="true" {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
   %v = load double, ptr addrspace(1) %gep, align 8
@@ -650,7 +650,7 @@ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(ptr addrsp
 ; GCN: {{flat|global}}_load_ushort [[V1:v[0-9]+]],
 ; GCN: v_max_f16_e32 [[V2:v[0-9]+]], [[V1]], [[V1]]
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V2]]
-define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(ptr addrspace(1) %arg, ptr addrspace(1) %out) "no-nans-fp-math"="true" {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
   %v = load half, ptr addrspace(1) %gep, align 2
@@ -724,7 +724,7 @@ define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %
 ; GCN: v_min_f32_e32 v0, v0, v1
 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
 ; GCN-NEXT: ; return
-define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode_nnan(float %arg0, float %arg1) #1 {
+define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode_nnan(float %arg0, float %arg1) "no-nans-fp-math"="true" {
   %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
   ret float %canonicalized
@@ -859,33 +859,29 @@ define float @v_test_canonicalize_amdgcn_exp2(float %a) {
 ; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
 ; GCN: .amd_amdgpu_isa
 
-declare float @llvm.canonicalize.f32(float) #0
-declare float @llvm.copysign.f32(float, float) #0
-declare float @llvm.amdgcn.fmul.legacy(float, float) #0
-declare float @llvm.amdgcn.fmad.ftz.f32(float, float, float) #0
-declare double @llvm.canonicalize.f64(double) #0
-declare half @llvm.canonicalize.f16(half) #0
-declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.sqrt.f32(float) #0
-declare float @llvm.ceil.f32(float) #0
-declare float @llvm.floor.f32(float) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.sin.f32(float) #0
-declare float @llvm.cos.f32(float) #0
-declare half @llvm.sin.f16(half) #0
-declare half @llvm.cos.f16(half) #0
-declare float @llvm.minnum.f32(float, float) #0
-declare float @llvm.maxnum.f32(float, float) #0
-declare double @llvm.maxnum.f64(double, double) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
-declare float @llvm.amdgcn.cubeid(float, float, float) #0
-declare float @llvm.amdgcn.frexp.mant.f32(float) #0
-declare float @llvm.amdgcn.log.f32(float) #0
-declare float @llvm.amdgcn.exp2.f32(float) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { "no-nans-fp-math"="true" }
-attributes #2 = { "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" }
+declare float @llvm.canonicalize.f32(float) nounwind readnone
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+declare float @llvm.amdgcn.fmul.legacy(float, float) nounwind readnone
+declare float @llvm.amdgcn.fmad.ftz.f32(float, float, float) nounwind readnone
+declare double @llvm.canonicalize.f64(double) nounwind readnone
+declare half @llvm.canonicalize.f16(half) nounwind readnone
+declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.sqrt.f32(float) nounwind readnone
+declare float @llvm.ceil.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.sin.f32(float) nounwind readnone
+declare float @llvm.cos.f32(float) nounwind readnone
+declare half @llvm.sin.f16(half) nounwind readnone
+declare half @llvm.cos.f16(half) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare double @llvm.maxnum.f64(double, double) nounwind readnone
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+declare float @llvm.amdgcn.cubeid(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone
+declare float @llvm.amdgcn.log.f32(float) nounwind readnone
+declare float @llvm.amdgcn.exp2.f32(float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index 581b7b4cff9ed0..912d1dac5ed53d 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -4,21 +4,21 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-declare half @llvm.fabs.f16(half) #0
-declare half @llvm.canonicalize.f16(half) #0
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
-declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
-declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) #0
-declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0
-declare <6 x half> @llvm.canonicalize.v6f16(<6 x half>) #0
-declare <8 x half> @llvm.canonicalize.v8f16(<8 x half>) #0
-declare <12 x half> @llvm.canonicalize.v12f16(<12 x half>) #0
-declare <16 x half> @llvm.canonicalize.v16f16(<16 x half>) #0
-declare <32 x half> @llvm.canonicalize.v32f16(<32 x half>) #0
-declare <64 x half> @llvm.canonicalize.v64f16(<64 x half>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(ptr addrspace(1) %out) #1 {
+declare half @llvm.fabs.f16(half) nounwind readnone
+declare half @llvm.canonicalize.f16(half) nounwind readnone
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nounwind readnone
+declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) nounwind readnone
+declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) nounwind readnone
+declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) nounwind readnone
+declare <6 x half> @llvm.canonicalize.v6f16(<6 x half>) nounwind readnone
+declare <8 x half> @llvm.canonicalize.v8f16(<8 x half>) nounwind readnone
+declare <12 x half> @llvm.canonicalize.v12f16(<12 x half>) nounwind readnone
+declare <16 x half> @llvm.canonicalize.v16f16(<16 x half>) nounwind readnone
+declare <32 x half> @llvm.canonicalize.v32f16(<32 x half>) nounwind readnone
+declare <64 x half> @llvm.canonicalize.v64f16(<64 x half>) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+
+define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_undef_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -61,7 +61,7 @@ define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_var_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_var_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -116,7 +116,7 @@ define amdgpu_kernel void @v_test_canonicalize_var_f16(ptr addrspace(1) %out) #1
   ret void
 }
 
-define amdgpu_kernel void @s_test_canonicalize_var_f16(ptr addrspace(1) %out, i16 zeroext %val.arg) #1 {
+define amdgpu_kernel void @s_test_canonicalize_var_f16(ptr addrspace(1) %out, i16 zeroext %val.arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: s_test_canonicalize_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
@@ -168,7 +168,7 @@ define amdgpu_kernel void @s_test_canonicalize_var_f16(ptr addrspace(1) %out, i1
   ret void
 }
 
-define half @s_test_canonicalize_arg(half %x) #1 {
+define half @s_test_canonicalize_arg(half %x) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: s_test_canonicalize_arg:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -197,7 +197,7 @@ define half @s_test_canonicalize_arg(half %x) #1 {
   ret half %canonicalized
 }
 
-define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1 {
+define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_build_vector_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -236,7 +236,7 @@ define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1
   ret <2 x half> %canonicalized
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_fabs_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -292,7 +292,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_fneg_fabs_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -349,7 +349,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_fneg_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -405,7 +405,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_no_denormals_canonicalize_fneg_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -461,7 +461,7 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(ptr add
   ret void
 }
 
-define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_no_denormals_canonicalize_fneg_fabs_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -518,7 +518,7 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(pt
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p0_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p0_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_p0_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -561,7 +561,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f16(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n0_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n0_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_n0_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -605,7 +605,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f16(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p1_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p1_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_p1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -649,7 +649,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f16(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n1_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n1_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_n1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -693,7 +693,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f16(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_literal_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_literal_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_literal_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -737,7 +737,7 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f16(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_default_denormals_fold_canonicalize_denormal0_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -781,7 +781,7 @@ define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f1
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_denormals_fold_canonicalize_denormal0_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -825,7 +825,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(ptr ad
   ret void
 }
 
-define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_default_denormals_fold_canonicalize_denormal1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -869,7 +869,7 @@ define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f1
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_denormals_fold_canonicalize_denormal1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -913,7 +913,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(ptr ad
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_qnan_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -957,7 +957,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_qnan_value_neg1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1001,7 +1001,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_qnan_value_neg2_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1045,7 +1045,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_snan0_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1089,7 +1089,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_snan1_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1133,7 +1133,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_snan2_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1177,7 +1177,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_snan3_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1221,7 +1221,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_var_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_var_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1295,7 +1295,7 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f16(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_fabs_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1373,7 +1373,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_fneg_fabs_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1453,7 +1453,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_fneg_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1529,7 +1529,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @s_test_canonicalize_var_v2f16(ptr addrspace(1) %out, i32 zeroext %val.arg) #1 {
+define amdgpu_kernel void @s_test_canonicalize_var_v2f16(ptr addrspace(1) %out, i32 zeroext %val.arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: s_test_canonicalize_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
@@ -1590,7 +1590,7 @@ define amdgpu_kernel void @s_test_canonicalize_var_v2f16(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_p0_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1633,7 +1633,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_n0_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1677,7 +1677,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_p1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1721,7 +1721,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_n1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1765,7 +1765,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_literal_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1809,7 +1809,7 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_no_denormals_fold_canonicalize_denormal0_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1853,7 +1853,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(p
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_denormals_fold_canonicalize_denormal0_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1897,7 +1897,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(ptr
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_no_denormals_fold_canonicalize_denormal1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1941,7 +1941,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(p
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_denormals_fold_canonicalize_denormal1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1985,7 +1985,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(ptr
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_qnan_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2029,7 +2029,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_qnan_value_neg1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2073,7 +2073,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(ptr addr
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_qnan_value_neg2_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2117,7 +2117,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(ptr addr
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_snan0_value_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2161,7 +2161,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_snan1_value_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2205,7 +2205,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_snan2_value_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2249,7 +2249,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: test_fold_canonicalize_snan3_value_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2293,7 +2293,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(ptr addrspac
   ret void
 }
 
-define <3 x half> @v_test_canonicalize_var_v3f16(<3 x half> %val) #1 {
+define <3 x half> @v_test_canonicalize_var_v3f16(<3 x half> %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_v3f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2331,7 +2331,7 @@ define <3 x half> @v_test_canonicalize_var_v3f16(<3 x half> %val) #1 {
   ret <3 x half> %canonicalized
 }
 
-define <4 x half> @v_test_canonicalize_var_v4f16(<4 x half> %val) #1 {
+define <4 x half> @v_test_canonicalize_var_v4f16(<4 x half> %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_v4f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2373,7 +2373,7 @@ define <4 x half> @v_test_canonicalize_var_v4f16(<4 x half> %val) #1 {
   ret <4 x half> %canonicalized
 }
 
-define amdgpu_kernel void @s_test_canonicalize_undef_v2f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @s_test_canonicalize_undef_v2f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: s_test_canonicalize_undef_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2416,7 +2416,7 @@ define amdgpu_kernel void @s_test_canonicalize_undef_v2f16(ptr addrspace(1) %out
   ret void
 }
 
-define <2 x half> @v_test_canonicalize_reg_undef_v2f16(half %val) #1 {
+define <2 x half> @v_test_canonicalize_reg_undef_v2f16(half %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_reg_undef_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2450,7 +2450,7 @@ define <2 x half> @v_test_canonicalize_reg_undef_v2f16(half %val) #1 {
   ret <2 x half> %canonicalized
 }
 
-define <2 x half> @v_test_canonicalize_undef_reg_v2f16(half %val) #1 {
+define <2 x half> @v_test_canonicalize_undef_reg_v2f16(half %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_undef_reg_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2483,7 +2483,7 @@ define <2 x half> @v_test_canonicalize_undef_reg_v2f16(half %val) #1 {
   ret <2 x half> %canonicalized
 }
 
-define <2 x half> @v_test_canonicalize_undef_lo_imm_hi_v2f16() #1 {
+define <2 x half> @v_test_canonicalize_undef_lo_imm_hi_v2f16() nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_undef_lo_imm_hi_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2513,7 +2513,7 @@ define <2 x half> @v_test_canonicalize_undef_lo_imm_hi_v2f16() #1 {
   ret <2 x half> %canonicalized
 }
 
-define <2 x half> @v_test_canonicalize_imm_lo_undef_hi_v2f16() #1 {
+define <2 x half> @v_test_canonicalize_imm_lo_undef_hi_v2f16() nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_imm_lo_undef_hi_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2543,7 +2543,7 @@ define <2 x half> @v_test_canonicalize_imm_lo_undef_hi_v2f16() #1 {
   ret <2 x half> %canonicalized
 }
 
-define <2 x half> @v_test_canonicalize_undef_lo_k_hi_v2f16() #1 {
+define <2 x half> @v_test_canonicalize_undef_lo_k_hi_v2f16() nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_undef_lo_k_hi_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2573,7 +2573,7 @@ define <2 x half> @v_test_canonicalize_undef_lo_k_hi_v2f16() #1 {
   ret <2 x half> %canonicalized
 }
 
-define <2 x half> @v_test_canonicalize_k_lo_undef_hi_v2f16() #1 {
+define <2 x half> @v_test_canonicalize_k_lo_undef_hi_v2f16() nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_k_lo_undef_hi_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2603,7 +2603,7 @@ define <2 x half> @v_test_canonicalize_k_lo_undef_hi_v2f16() #1 {
   ret <2 x half> %canonicalized
 }
 
-define <2 x half> @v_test_canonicalize_reg_k_v2f16(half %val) #1 {
+define <2 x half> @v_test_canonicalize_reg_k_v2f16(half %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_reg_k_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2639,7 +2639,7 @@ define <2 x half> @v_test_canonicalize_reg_k_v2f16(half %val) #1 {
   ret <2 x half> %canonicalized
 }
 
-define <2 x half> @v_test_canonicalize_k_reg_v2f16(half %val) #1 {
+define <2 x half> @v_test_canonicalize_k_reg_v2f16(half %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_k_reg_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2675,7 +2675,7 @@ define <2 x half> @v_test_canonicalize_k_reg_v2f16(half %val) #1 {
   ret <2 x half> %canonicalized
 }
 
-define amdgpu_kernel void @s_test_canonicalize_undef_v4f16(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @s_test_canonicalize_undef_v4f16(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: s_test_canonicalize_undef_v4f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2723,7 +2723,7 @@ define amdgpu_kernel void @s_test_canonicalize_undef_v4f16(ptr addrspace(1) %out
   ret void
 }
 
-define <4 x half> @v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) #1 {
+define <4 x half> @v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_reg_undef_undef_undef_v4f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2763,7 +2763,7 @@ define <4 x half> @v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) #1
   ret <4 x half> %canonicalized
 }
 
-define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, half %val1) #1 {
+define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, half %val1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_reg_reg_undef_undef_v4f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2807,7 +2807,7 @@ define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal
   ret <4 x half> %canonicalized
 }
 
-define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half %val1, half %val2) #1 {
+define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half %val1, half %val2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_reg_undef_reg_reg_v4f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2856,7 +2856,7 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
   ret <4 x half> %canonicalized
 }
 
-define <6 x half> @v_test_canonicalize_var_v6f16(<6 x half> %val) #1 {
+define <6 x half> @v_test_canonicalize_var_v6f16(<6 x half> %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_v6f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2907,7 +2907,7 @@ define <6 x half> @v_test_canonicalize_var_v6f16(<6 x half> %val) #1 {
   ret <6 x half> %canonicalized
 }
 
-define <8 x half> @v_test_canonicalize_var_v8f16(<8 x half> %val) #1 {
+define <8 x half> @v_test_canonicalize_var_v8f16(<8 x half> %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_v8f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2967,7 +2967,7 @@ define <8 x half> @v_test_canonicalize_var_v8f16(<8 x half> %val) #1 {
   ret <8 x half> %canonicalized
 }
 
-define <12 x half> @v_test_canonicalize_var_v12f16(<12 x half> %val) #1 {
+define <12 x half> @v_test_canonicalize_var_v12f16(<12 x half> %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_v12f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3045,7 +3045,7 @@ define <12 x half> @v_test_canonicalize_var_v12f16(<12 x half> %val) #1 {
   ret <12 x half> %canonicalized
 }
 
-define <16 x half> @v_test_canonicalize_var_v16f16(<16 x half> %val) #1 {
+define <16 x half> @v_test_canonicalize_var_v16f16(<16 x half> %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_v16f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3141,7 +3141,7 @@ define <16 x half> @v_test_canonicalize_var_v16f16(<16 x half> %val) #1 {
   ret <16 x half> %canonicalized
 }
 
-define <32 x half> @v_test_canonicalize_var_v32f16(<32 x half> %val) #1 {
+define <32 x half> @v_test_canonicalize_var_v32f16(<32 x half> %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_v32f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3311,7 +3311,7 @@ define <32 x half> @v_test_canonicalize_var_v32f16(<32 x half> %val) #1 {
   ret <32 x half> %canonicalized
 }
 
-define <64 x half> @v_test_canonicalize_var_v64f16(<64 x half> %val) #1 {
+define <64 x half> @v_test_canonicalize_var_v64f16(<64 x half> %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; VI-LABEL: v_test_canonicalize_var_v64f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3883,8 +3883,3 @@ define <64 x half> @v_test_canonicalize_var_v64f16(<64 x half> %val) #1 {
   %canonicalized = call <64 x half> @llvm.canonicalize.v64f16(<64 x half> %val)
   ret <64 x half> %canonicalized
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #3 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
index d53c0411ad88c1..5317ac5d999232 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -5,22 +5,22 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.canonicalize.f32(float) #0
-declare <2 x float> @llvm.canonicalize.v2f32(<2 x float>) #0
-declare <3 x float> @llvm.canonicalize.v3f32(<3 x float>) #0
-declare <4 x float> @llvm.canonicalize.v4f32(<4 x float>) #0
-declare <8 x float> @llvm.canonicalize.v8f32(<8 x float>) #0
-declare double @llvm.fabs.f64(double) #0
-declare double @llvm.canonicalize.f64(double) #0
-declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0
-declare <3 x double> @llvm.canonicalize.v3f64(<3 x double>) #0
-declare <4 x double> @llvm.canonicalize.v4f64(<4 x double>) #0
-declare half @llvm.canonicalize.f16(half) #0
-declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-define amdgpu_kernel void @v_test_canonicalize_var_f32(ptr addrspace(1) %out) #1 {
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.canonicalize.f32(float) nounwind readnone
+declare <2 x float> @llvm.canonicalize.v2f32(<2 x float>) nounwind readnone
+declare <3 x float> @llvm.canonicalize.v3f32(<3 x float>) nounwind readnone
+declare <4 x float> @llvm.canonicalize.v4f32(<4 x float>) nounwind readnone
+declare <8 x float> @llvm.canonicalize.v8f32(<8 x float>) nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare double @llvm.canonicalize.f64(double) nounwind readnone
+declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) nounwind readnone
+declare <3 x double> @llvm.canonicalize.v3f64(<3 x double>) nounwind readnone
+declare <4 x double> @llvm.canonicalize.v4f64(<4 x double>) nounwind readnone
+declare half @llvm.canonicalize.f16(half) nounwind readnone
+declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+
+define amdgpu_kernel void @v_test_canonicalize_var_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_var_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -75,7 +75,7 @@ define amdgpu_kernel void @v_test_canonicalize_var_f32(ptr addrspace(1) %out) #1
   ret void
 }
 
-define amdgpu_kernel void @s_test_canonicalize_var_f32(ptr addrspace(1) %out, float %val) #1 {
+define amdgpu_kernel void @s_test_canonicalize_var_f32(ptr addrspace(1) %out, float %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: s_test_canonicalize_var_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -136,7 +136,7 @@ define amdgpu_kernel void @s_test_canonicalize_var_f32(ptr addrspace(1) %out, fl
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_fabs_var_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -192,7 +192,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_fneg_fabs_var_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -249,7 +249,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_fneg_var_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -305,7 +305,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_undef_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_undef_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_undef_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -348,7 +348,7 @@ define amdgpu_kernel void @test_fold_canonicalize_undef_f32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p0_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p0_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_p0_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -391,7 +391,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f32(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n0_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n0_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_n0_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -437,7 +437,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f32(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p1_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p1_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_p1_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -481,7 +481,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f32(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n1_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n1_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_n1_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -525,7 +525,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f32(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_literal_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_literal_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_literal_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -569,7 +569,7 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f32(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -612,7 +612,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(ptr
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic(ptr addrspace(1) %out) #5 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -660,7 +660,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dyn
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out(ptr addrspace(1) %out) #6 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="dynamic,ieee" {
 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -708,7 +708,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dyn
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in(ptr addrspace(1) %out) #7 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="ieee,dynamic" {
 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -756,7 +756,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dyn
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math"="ieee,ieee" {
 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -800,7 +800,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(ptr ad
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal1_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -846,7 +846,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(ptr
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math"="ieee,ieee" {
 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal1_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -890,7 +890,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(ptr ad
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_qnan_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -934,7 +934,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg1_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -978,7 +978,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg2_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1022,7 +1022,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_snan0_value_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1066,7 +1066,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_snan1_value_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1110,7 +1110,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_snan2_value_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1154,7 +1154,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_snan3_value_f32:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1198,7 +1198,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_var_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_var_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_var_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1253,7 +1253,7 @@ define amdgpu_kernel void @v_test_canonicalize_var_f64(ptr addrspace(1) %out) #1
   ret void
 }
 
-define amdgpu_kernel void @s_test_canonicalize_var_f64(ptr addrspace(1) %out, double %val) #1 {
+define amdgpu_kernel void @s_test_canonicalize_var_f64(ptr addrspace(1) %out, double %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: s_test_canonicalize_var_f64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1309,7 +1309,7 @@ define amdgpu_kernel void @s_test_canonicalize_var_f64(ptr addrspace(1) %out, do
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_fabs_var_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1365,7 +1365,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_fneg_fabs_var_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1422,7 +1422,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_fneg_var_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1478,7 +1478,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p0_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p0_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_p0_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1527,7 +1527,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f64(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n0_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n0_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_n0_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1574,7 +1574,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f64(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p1_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p1_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_p1_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1619,7 +1619,7 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f64(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n1_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n1_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_n1_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1664,7 +1664,7 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f64(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_literal_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_literal_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_literal_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1709,7 +1709,7 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f64(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1758,7 +1758,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(ptr
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math"="ieee,ieee" {
 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal0_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1806,7 +1806,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(ptr ad
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal1_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1853,7 +1853,7 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(ptr
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math"="ieee,ieee" {
 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal1_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1901,7 +1901,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(ptr ad
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_qnan_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1946,7 +1946,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg1_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1991,7 +1991,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg2_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2036,7 +2036,7 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_snan0_value_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2081,7 +2081,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_snan1_value_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2126,7 +2126,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_snan2_value_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2171,7 +2171,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: test_fold_canonicalize_snan3_value_f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2216,7 +2216,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @test_canonicalize_value_f64_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
+define amdgpu_kernel void @test_canonicalize_value_f64_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: test_canonicalize_value_f64_flush:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2296,7 +2296,7 @@ define amdgpu_kernel void @test_canonicalize_value_f64_flush(ptr addrspace(1) %a
   ret void
 }
 
-define amdgpu_kernel void @test_canonicalize_value_f32_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
+define amdgpu_kernel void @test_canonicalize_value_f32_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: test_canonicalize_value_f32_flush:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2376,7 +2376,7 @@ define amdgpu_kernel void @test_canonicalize_value_f32_flush(ptr addrspace(1) %a
   ret void
 }
 
-define amdgpu_kernel void @test_canonicalize_value_f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
+define amdgpu_kernel void @test_canonicalize_value_f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: test_canonicalize_value_f16_flush:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2458,7 +2458,7 @@ define amdgpu_kernel void @test_canonicalize_value_f16_flush(ptr addrspace(1) %a
 }
 
 
-define amdgpu_kernel void @test_canonicalize_value_v2f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
+define amdgpu_kernel void @test_canonicalize_value_v2f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: test_canonicalize_value_v2f16_flush:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2547,7 +2547,7 @@ define amdgpu_kernel void @test_canonicalize_value_v2f16_flush(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @test_canonicalize_value_f64_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_canonicalize_value_f64_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) nounwind "denormal-fp-math"="ieee,ieee" {
 ; GFX6-LABEL: test_canonicalize_value_f64_denorm:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2627,7 +2627,7 @@ define amdgpu_kernel void @test_canonicalize_value_f64_denorm(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @test_canonicalize_value_f32_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_canonicalize_value_f32_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) nounwind "denormal-fp-math"="ieee,ieee" {
 ; GFX6-LABEL: test_canonicalize_value_f32_denorm:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2708,7 +2708,7 @@ define amdgpu_kernel void @test_canonicalize_value_f32_denorm(ptr addrspace(1) %
 }
 
 ; FIXME: Conversion to float should count as the canonicalize pre-gfx8
-define amdgpu_kernel void @test_canonicalize_value_f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_canonicalize_value_f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) nounwind "denormal-fp-math"="ieee,ieee" {
 ; GFX6-LABEL: test_canonicalize_value_f16_denorm:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2791,7 +2791,7 @@ define amdgpu_kernel void @test_canonicalize_value_f16_denorm(ptr addrspace(1) %
 
 
 
-define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) nounwind "denormal-fp-math"="ieee,ieee" {
 ; GFX6-LABEL: test_canonicalize_value_v2f16_denorm:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2879,7 +2879,7 @@ define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_var_v2f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_var_v2f64(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_test_canonicalize_var_v2f64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2965,7 +2965,7 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f64(ptr addrspace(1) %out)
 }
 
 
-define <2 x float> @v_test_canonicalize_v2f32_flush(<2 x float> %arg) #1 {
+define <2 x float> @v_test_canonicalize_v2f32_flush(<2 x float> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_v2f32_flush:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3000,7 +3000,7 @@ define <2 x float> @v_test_canonicalize_v2f32_flush(<2 x float> %arg) #1 {
 }
 
 
-define <3 x float> @v_test_canonicalize_v3f32_flush(<3 x float> %arg) #1 {
+define <3 x float> @v_test_canonicalize_v3f32_flush(<3 x float> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_v3f32_flush:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3039,7 +3039,7 @@ define <3 x float> @v_test_canonicalize_v3f32_flush(<3 x float> %arg) #1 {
 }
 
 
-define <4 x float> @v_test_canonicalize_v4f32_flush(<4 x float> %arg) #1 {
+define <4 x float> @v_test_canonicalize_v4f32_flush(<4 x float> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_v4f32_flush:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3080,7 +3080,7 @@ define <4 x float> @v_test_canonicalize_v4f32_flush(<4 x float> %arg) #1 {
 }
 
 
-define <8 x float> @v_test_canonicalize_v8f32_flush(<8 x float> %arg) #1 {
+define <8 x float> @v_test_canonicalize_v8f32_flush(<8 x float> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_v8f32_flush:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3132,7 +3132,7 @@ define <8 x float> @v_test_canonicalize_v8f32_flush(<8 x float> %arg) #1 {
   ret <8 x float> %canon
 }
 
-define <2 x double> @v_test_canonicalize_v2f64(<2 x double> %arg) #1 {
+define <2 x double> @v_test_canonicalize_v2f64(<2 x double> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_v2f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3168,7 +3168,7 @@ define <2 x double> @v_test_canonicalize_v2f64(<2 x double> %arg) #1 {
   ret <2 x double> %canon
 }
 
-define <3 x double> @v_test_canonicalize_v3f64(<3 x double> %arg) #1 {
+define <3 x double> @v_test_canonicalize_v3f64(<3 x double> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_v3f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3208,7 +3208,7 @@ define <3 x double> @v_test_canonicalize_v3f64(<3 x double> %arg) #1 {
   ret <3 x double> %canon
 }
 
-define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {
+define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX678-LABEL: v_test_canonicalize_v4f64:
 ; GFX678:       ; %bb.0:
 ; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3251,12 +3251,3 @@ define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {
   %canon = call <4 x double> @llvm.canonicalize.v4f64(<4 x double> %arg)
   ret <4 x double> %canon
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" }
-attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #5 = { nounwind "denormal-fp-math-f32"="dynamic,dynamic" }
-attributes #6 = { nounwind "denormal-fp-math-f32"="dynamic,ieee" }
-attributes #7 = { nounwind "denormal-fp-math-f32"="ieee,dynamic" }
diff --git a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
index 7d8f43bbe16b73..26b3b65ad24ddc 100644
--- a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
@@ -2803,7 +2803,4 @@ entry:
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare half @llvm.fabs.f16(half) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fconst64.ll b/llvm/test/CodeGen/AMDGPU/fconst64.ll
index ab5a38949295fa..c82039749519ef 100644
--- a/llvm/test/CodeGen/AMDGPU/fconst64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fconst64.ll
@@ -14,7 +14,4 @@ define amdgpu_kernel void @fconst_f64(ptr addrspace(1) %out, ptr addrspace(1) %i
    ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
index b8936911f05762..14dd1828db717e 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
@@ -4,13 +4,13 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope --check-prefixes=GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11 %s
 
-declare half @llvm.copysign.f16(half, half) #0
-declare float @llvm.copysign.f32(float, float) #0
-declare double @llvm.copysign.f64(double, double) #0
-declare <2 x half> @llvm.copysign.v2f16(<2 x half>, <2 x half>) #0
-declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>) #0
-declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare half @llvm.copysign.f16(half, half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.copysign.f32(float, float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare double @llvm.copysign.f64(double, double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.copysign.v2f16(<2 x half>, <2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 define amdgpu_kernel void @s_copysign_f16(ptr addrspace(1) %arg_out, half %mag, half %sign) {
 ; SI-LABEL: s_copysign_f16:
@@ -2113,5 +2113,3 @@ define amdgpu_kernel void @s_copysign_v4f16(ptr addrspace(1) %arg_out, <4 x half
   store <4 x half> %out, ptr addrspace(1) %arg_out
   ret void
 }
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
index f48961c905f58f..6c851e5fffa2d8 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
@@ -1145,10 +1145,8 @@ define amdgpu_kernel void @s_test_copysign_f32_fpext_bf16(ptr addrspace(1) %out,
   ret void
 }
 
-declare float @llvm.copysign.f32(float, float) #0
-declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) #0
-declare <3 x float> @llvm.copysign.v3f32(<3 x float>, <3 x float>) #0
-declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) #0
-declare <5 x float> @llvm.copysign.v5f32(<5 x float>, <5 x float>) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare float @llvm.copysign.f32(float, float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @llvm.copysign.v3f32(<3 x float>, <3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <5 x float> @llvm.copysign.v5f32(<5 x float>, <5 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
index b5fa3fd9eccc13..919408c7d3f5a9 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
@@ -3,10 +3,10 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=SIVI,VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
 
-declare double @llvm.copysign.f64(double, double) #0
-declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0
-declare <3 x double> @llvm.copysign.v3f64(<3 x double>, <3 x double>) #0
-declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) #0
+declare double @llvm.copysign.f64(double, double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x double> @llvm.copysign.v3f64(<3 x double>, <3 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 define amdgpu_kernel void @s_test_copysign_f64(ptr addrspace(1) %out, [8 x i32], double %mag, [8 x i32], double %sign) {
 ; SI-LABEL: s_test_copysign_f64:
@@ -1094,5 +1094,3 @@ define <4 x double> @v_test_copysign_v4f64(ptr addrspace(1) %out, <4 x double> %
   %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
   ret <4 x double> %result
 }
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
index 2d6ae31f8e585a..85d69d4f694697 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
@@ -4,7 +4,7 @@
 ; Make sure nofpexcept flags are emitted when lowering a
 ; non-constrained fdiv.
 
-define float @fdiv_f32(float %a, float %b) #0 {
+define float @fdiv_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   ; GCN-LABEL: name: fdiv_f32
   ; GCN: bb.0.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
@@ -35,7 +35,7 @@ entry:
   ret float %fdiv
 }
 
-define float @fdiv_nnan_f32(float %a, float %b) #0 {
+define float @fdiv_nnan_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   ; GCN-LABEL: name: fdiv_nnan_f32
   ; GCN: bb.0.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
@@ -65,5 +65,3 @@ entry:
   %fdiv = fdiv nnan float %a, %b
   ret float %fdiv
 }
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
index b14b6421f56b4e..30d20a9c8921f2 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
@@ -130,7 +130,7 @@ define amdgpu_kernel void @v_fdiv_f16(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -144,7 +144,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_rcp_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_rcp_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_rcp_f16:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -238,7 +238,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_rcp_f16_abs(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_rcp_f16_abs(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_rcp_f16_abs:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -335,7 +335,7 @@ entry:
 
 ; We could not do 1/b -> rcp_f32(b) under !fpmath < 1ulp.
 
-define amdgpu_kernel void @reciprocal_f16_rounded(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @reciprocal_f16_rounded(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: reciprocal_f16_rounded:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -429,7 +429,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_rcp_f16_afn(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_rcp_f16_afn(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_rcp_f16_afn:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -510,7 +510,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_rcp_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_rcp_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_rcp_f16_neg:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -604,7 +604,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_rsq_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_rsq_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_rsq_f16:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -702,7 +702,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_rsq_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_rsq_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_rsq_f16_neg:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -805,7 +805,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_rsq_f16_multi_use(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_rsq_f16_multi_use(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_rsq_f16_multi_use:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -914,7 +914,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_rsq_f16_missing_contract0(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_rsq_f16_missing_contract0(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_rsq_f16_missing_contract0:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1017,7 +1017,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_rsq_f16_missing_contract1(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_rsq_f16_missing_contract1(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_rsq_f16_missing_contract1:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1120,7 +1120,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_neg_rsq_f16_missing_contract1(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_neg_rsq_f16_missing_contract1(ptr addrspace(1) %r, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_neg_rsq_f16_missing_contract1:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1223,7 +1223,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_fdiv_f16_afn(ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @v_fdiv_f16_afn(ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
 ; SI-LABEL: v_fdiv_f16_afn:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1334,7 +1334,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @v_fdiv_f16_unsafe(ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b) #2 {
+define amdgpu_kernel void @v_fdiv_f16_unsafe(ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind "unsafe-fp-math"="true" {
 ; SI-LABEL: v_fdiv_f16_unsafe:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1445,7 +1445,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @div_afn_2_x_pat_f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @div_afn_2_x_pat_f16(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: div_afn_2_x_pat_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1512,7 +1512,7 @@ define amdgpu_kernel void @div_afn_2_x_pat_f16(ptr addrspace(1) %out) #0 {
   ret void
 }
 
-define amdgpu_kernel void @div_afn_k_x_pat_f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @div_afn_k_x_pat_f16(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: div_afn_k_x_pat_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1579,7 +1579,7 @@ define amdgpu_kernel void @div_afn_k_x_pat_f16(ptr addrspace(1) %out) #0 {
   ret void
 }
 
-define amdgpu_kernel void @div_afn_neg_k_x_pat_f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @div_afn_neg_k_x_pat_f16(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: div_afn_neg_k_x_pat_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -2145,11 +2145,7 @@ define <2 x half> @v_neg_rsq_v2f16(<2 x half> %a) {
   ret <2 x half> %fdiv
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare half @llvm.sqrt.f16(half) #2
-declare half @llvm.fabs.f16(half) #2
-declare <2 x half> @llvm.sqrt.v2f16(<2 x half>) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind "unsafe-fp-math"="true"
+declare half @llvm.sqrt.f16(half) nounwind "unsafe-fp-math"="true"
+declare half @llvm.fabs.f16(half) nounwind "unsafe-fp-math"="true"
+declare <2 x half> @llvm.sqrt.v2f16(<2 x half>) nounwind "unsafe-fp-math"="true"
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
index dd31f210b7d89d..edbad18788698e 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
@@ -29,7 +29,7 @@
 ; GCN: v_div_fixup_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[FMAS]], [[DEN]], [[NUM]]
 ; GCN: buffer_store_dwordx2 [[RESULT]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @fdiv_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fdiv_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %gep.1 = getelementptr double, ptr addrspace(1) %in, i32 1
   %num = load volatile double, ptr addrspace(1) %in
   %den = load volatile double, ptr addrspace(1) %gep.1
@@ -48,7 +48,7 @@ define amdgpu_kernel void @fdiv_f64(ptr addrspace(1) %out, ptr addrspace(1) %in)
 ; GCN: v_fma_f64 v[0:1], -v[2:3], v[6:7], v[0:1]
 ; GCN: v_fma_f64 v[0:1], v[0:1], v[4:5], v[6:7]
 ; GCN: s_setpc_b64
-define double @v_fdiv_f64_afn(double %x, double %y) #0 {
+define double @v_fdiv_f64_afn(double %x, double %y) nounwind {
   %result = fdiv afn double %x, %y
   ret double %result
 }
@@ -62,13 +62,13 @@ define double @v_fdiv_f64_afn(double %x, double %y) #0 {
 ; GCN: v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
 ; GCN: v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; GCN: s_setpc_b64
-define double @v_rcp_f64_afn(double %x) #0 {
+define double @v_rcp_f64_afn(double %x) nounwind {
   %result = fdiv afn double 1.0, %x
   ret double %result
 }
 
 ; GCN-LABEL: {{^}}fdiv_f64_s_v:
-define amdgpu_kernel void @fdiv_f64_s_v(ptr addrspace(1) %out, ptr addrspace(1) %in, double %num) #0 {
+define amdgpu_kernel void @fdiv_f64_s_v(ptr addrspace(1) %out, ptr addrspace(1) %in, double %num) nounwind {
   %den = load double, ptr addrspace(1) %in
   %result = fdiv double %num, %den
   store double %result, ptr addrspace(1) %out
@@ -76,7 +76,7 @@ define amdgpu_kernel void @fdiv_f64_s_v(ptr addrspace(1) %out, ptr addrspace(1)
 }
 
 ; GCN-LABEL: {{^}}fdiv_f64_v_s:
-define amdgpu_kernel void @fdiv_f64_v_s(ptr addrspace(1) %out, ptr addrspace(1) %in, double %den) #0 {
+define amdgpu_kernel void @fdiv_f64_v_s(ptr addrspace(1) %out, ptr addrspace(1) %in, double %den) nounwind {
   %num = load double, ptr addrspace(1) %in
   %result = fdiv double %num, %den
   store double %result, ptr addrspace(1) %out
@@ -84,14 +84,14 @@ define amdgpu_kernel void @fdiv_f64_v_s(ptr addrspace(1) %out, ptr addrspace(1)
 }
 
 ; GCN-LABEL: {{^}}fdiv_f64_s_s:
-define amdgpu_kernel void @fdiv_f64_s_s(ptr addrspace(1) %out, double %num, double %den) #0 {
+define amdgpu_kernel void @fdiv_f64_s_s(ptr addrspace(1) %out, double %num, double %den) nounwind {
   %result = fdiv double %num, %den
   store double %result, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_fdiv_v2f64:
-define amdgpu_kernel void @v_fdiv_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fdiv_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %gep.1 = getelementptr <2 x double>, ptr addrspace(1) %in, i32 1
   %num = load <2 x double>, ptr addrspace(1) %in
   %den = load <2 x double>, ptr addrspace(1) %gep.1
@@ -108,7 +108,7 @@ define amdgpu_kernel void @s_fdiv_v2f64(ptr addrspace(1) %out, <2 x double> %num
 }
 
 ; GCN-LABEL: {{^}}v_fdiv_v4f64:
-define amdgpu_kernel void @v_fdiv_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fdiv_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %gep.1 = getelementptr <4 x double>, ptr addrspace(1) %in, i32 1
   %num = load <4 x double>, ptr addrspace(1) %in
   %den = load <4 x double>, ptr addrspace(1) %gep.1
@@ -118,7 +118,7 @@ define amdgpu_kernel void @v_fdiv_v4f64(ptr addrspace(1) %out, ptr addrspace(1)
 }
 
 ; GCN-LABEL: {{^}}s_fdiv_v4f64:
-define amdgpu_kernel void @s_fdiv_v4f64(ptr addrspace(1) %out, <4 x double> %num, <4 x double> %den) #0 {
+define amdgpu_kernel void @s_fdiv_v4f64(ptr addrspace(1) %out, <4 x double> %num, <4 x double> %den) nounwind {
   %result = fdiv <4 x double> %num, %den
   store <4 x double> %result, ptr addrspace(1) %out
   ret void
@@ -127,7 +127,7 @@ define amdgpu_kernel void @s_fdiv_v4f64(ptr addrspace(1) %out, <4 x double> %num
 ; GCN-LABEL: {{^}}div_fast_2_x_pat_f64:
 ; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0.5
 ; GCN: buffer_store_dwordx2 [[MUL]]
-define amdgpu_kernel void @div_fast_2_x_pat_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @div_fast_2_x_pat_f64(ptr addrspace(1) %out) nounwind "unsafe-fp-math"="true" {
   %x = load double, ptr addrspace(1) undef
   %rcp = fdiv fast double %x, 2.0
   store double %rcp, ptr addrspace(1) %out, align 4
@@ -139,7 +139,7 @@ define amdgpu_kernel void @div_fast_2_x_pat_f64(ptr addrspace(1) %out) #1 {
 ; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0x3fb99999
 ; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]]
 ; GCN: buffer_store_dwordx2 [[MUL]]
-define amdgpu_kernel void @div_fast_k_x_pat_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @div_fast_k_x_pat_f64(ptr addrspace(1) %out) nounwind "unsafe-fp-math"="true" {
   %x = load double, ptr addrspace(1) undef
   %rcp = fdiv fast double %x, 10.0
   store double %rcp, ptr addrspace(1) %out, align 4
@@ -151,12 +151,9 @@ define amdgpu_kernel void @div_fast_k_x_pat_f64(ptr addrspace(1) %out) #1 {
 ; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0xbfb99999
 ; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]]
 ; GCN: buffer_store_dwordx2 [[MUL]]
-define amdgpu_kernel void @div_fast_neg_k_x_pat_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @div_fast_neg_k_x_pat_f64(ptr addrspace(1) %out) nounwind "unsafe-fp-math"="true" {
   %x = load double, ptr addrspace(1) undef
   %rcp = fdiv fast double %x, -10.0
   store double %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll
index 8155ac7eb256ec..9d0cd0ccd5c376 100644
--- a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll
@@ -6,7 +6,7 @@
 ; Stores to the same address appear multiple places in the same
 ; block. When sorted by offset, the merges would fail. We should form
 ; two groupings of ds_write2_b64 on either side of the fence.
-define amdgpu_kernel void @same_address_fence_merge_write2() #0 {
+define amdgpu_kernel void @same_address_fence_merge_write2() nounwind readnone speculatable {
 ; GCN-LABEL: same_address_fence_merge_write2:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_mov_b32 s0, 0
@@ -61,10 +61,7 @@ bb:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.amdgcn.s.barrier() #1
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { convergent nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
 
 !0 = !{i32 0, i32 1024}
diff --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
index c25aea1bd355e1..03d87165ce0995 100644
--- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
@@ -51,6 +51,4 @@ entry:
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
index 69ddd82307c588..c573d44bd3fb43 100644
--- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
+++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
@@ -92,6 +92,4 @@ entry:
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
-attributes #0 = { readnone }
+declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
diff --git a/llvm/test/CodeGen/AMDGPU/ffloor.ll b/llvm/test/CodeGen/AMDGPU/ffloor.ll
index dda5c16ff288c9..afa7ac92c54433 100644
--- a/llvm/test/CodeGen/AMDGPU/ffloor.ll
+++ b/llvm/test/CodeGen/AMDGPU/ffloor.ll
@@ -6,7 +6,7 @@
 ; SI: v_floor_f32_e32
 ; R600: FLOOR
 define amdgpu_kernel void @floor_f32(ptr addrspace(1) %out, float %in) {
-  %tmp = call float @llvm.floor.f32(float %in) #0
+  %tmp = call float @llvm.floor.f32(float %in) nounwind readnone
   store float %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -16,7 +16,7 @@ define amdgpu_kernel void @floor_f32(ptr addrspace(1) %out, float %in) {
 ; SI: v_floor_f32_e32
 
 define amdgpu_kernel void @floor_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
-  %tmp = call <2 x float> @llvm.floor.v2f32(<2 x float> %in) #0
+  %tmp = call <2 x float> @llvm.floor.v2f32(<2 x float> %in) nounwind readnone
   store <2 x float> %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -32,18 +32,16 @@ define amdgpu_kernel void @floor_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 ; R600: FLOOR
 ; R600: FLOOR
 define amdgpu_kernel void @floor_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
-  %tmp = call <4 x float> @llvm.floor.v4f32(<4 x float> %in) #0
+  %tmp = call <4 x float> @llvm.floor.v4f32(<4 x float> %in) nounwind readnone
   store <4 x float> %tmp, ptr addrspace(1) %out
   ret void
 }
 
 ; Function Attrs: nounwind readonly
-declare float @llvm.floor.f32(float) #0
+declare float @llvm.floor.f32(float) nounwind readnone
 
 ; Function Attrs: nounwind readonly
-declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
+declare <2 x float> @llvm.floor.v2f32(<2 x float>) nounwind readnone
 
 ; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.floor.v4f32(<4 x float>) #0
-
-attributes #0 = { nounwind readnone }
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll
index 6ce3c68fce24e5..bea2ea3feee06c 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll
@@ -7,7 +7,7 @@
 
 %struct.Data = type { [20 x i32] }
 
-define i32 @fp_save_restore_in_temp_sgpr(ptr addrspace(5) nocapture readonly byval(%struct.Data) align 4 %arg) #0 {
+define i32 @fp_save_restore_in_temp_sgpr(ptr addrspace(5) nocapture readonly byval(%struct.Data) align 4 %arg) norecurse nounwind "frame-pointer"="all" {
   ; GCN-LABEL: name: fp_save_restore_in_temp_sgpr
   ; GCN: bb.0.begin:
   ; GCN:   liveins: $sgpr11
@@ -41,5 +41,3 @@ end:                                               ; preds = %lp_end, %lp_begin
   %ret_val = phi i32 [ 0, %lp_begin ], [ 1, %lp_end ]
   ret i32 %ret_val
 }
-
-attributes #0 = { norecurse nounwind "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll
index 8e0750195b3b4d..2d6e1a8870931a 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll
@@ -8,7 +8,7 @@
 ; at function entry, the FP-SP can't be statically determined with dynamic stack realignment. To
 ; fix the problem, use FP as the frame base in the spills whenever the function has FP.
 
-define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 {
+define void @test_stack_realign(<8 x i32> %val, i32 %idx) noinline nounwind {
 ; GCN-LABEL: test_stack_realign:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -71,6 +71,4 @@ define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 {
   ret void
 }
 
-declare void @extern_func(i32) #0
-
-attributes #0 = { noinline nounwind }
+declare void @extern_func(i32) noinline nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll
index 9b20d9be278c6b..171b483c4431ad 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll
@@ -76,11 +76,7 @@ work:
   br i1 %tmp34, label %bb602, label %bb42
 }
 
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
-declare i32 @llvm.amdgcn.wwm.i32(i32) #1
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #1
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #2
-
-attributes #0 = { convergent nounwind readnone willreturn }
-attributes #1 = { convergent nounwind readnone speculatable willreturn }
-attributes #2 = { nounwind willreturn memory(argmem: write) }
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) convergent nounwind readnone willreturn
+declare i32 @llvm.amdgcn.wwm.i32(i32) convergent nounwind readnone speculatable willreturn
+declare i32 @llvm.amdgcn.strict.wwm.i32(i32) convergent nounwind readnone speculatable willreturn
+declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind willreturn memory(argmem: write)
diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
index 0ad53083d0ff3f..d0468e8f0bf9c0 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
@@ -13,7 +13,7 @@
 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
 ; GCN: flat_store_{{dword|b32}} v[[[LO_VREG]]:[[HI_VREG]]], v[[DATA]]
-define amdgpu_kernel void @store_flat_i32(ptr addrspace(1) %gptr, i32 %x) #0 {
+define amdgpu_kernel void @store_flat_i32(ptr addrspace(1) %gptr, i32 %x) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   store volatile i32 %x, ptr %fptr, align 4
   ret void
@@ -21,7 +21,7 @@ define amdgpu_kernel void @store_flat_i32(ptr addrspace(1) %gptr, i32 %x) #0 {
 
 ; GCN-LABEL: {{^}}store_flat_i64:
 ; GCN: flat_store_{{dwordx2|b64}}
-define amdgpu_kernel void @store_flat_i64(ptr addrspace(1) %gptr, i64 %x) #0 {
+define amdgpu_kernel void @store_flat_i64(ptr addrspace(1) %gptr, i64 %x) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   store volatile i64 %x, ptr %fptr, align 8
   ret void
@@ -29,7 +29,7 @@ define amdgpu_kernel void @store_flat_i64(ptr addrspace(1) %gptr, i64 %x) #0 {
 
 ; GCN-LABEL: {{^}}store_flat_v4i32:
 ; GCN: flat_store_{{dwordx4|b128}}
-define amdgpu_kernel void @store_flat_v4i32(ptr addrspace(1) %gptr, <4 x i32> %x) #0 {
+define amdgpu_kernel void @store_flat_v4i32(ptr addrspace(1) %gptr, <4 x i32> %x) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   store volatile <4 x i32> %x, ptr %fptr, align 16
   ret void
@@ -37,7 +37,7 @@ define amdgpu_kernel void @store_flat_v4i32(ptr addrspace(1) %gptr, <4 x i32> %x
 
 ; GCN-LABEL: {{^}}store_flat_trunc_i16:
 ; GCN: flat_store_{{short|b16}}
-define amdgpu_kernel void @store_flat_trunc_i16(ptr addrspace(1) %gptr, i32 %x) #0 {
+define amdgpu_kernel void @store_flat_trunc_i16(ptr addrspace(1) %gptr, i32 %x) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   %y = trunc i32 %x to i16
   store volatile i16 %y, ptr %fptr, align 2
@@ -46,7 +46,7 @@ define amdgpu_kernel void @store_flat_trunc_i16(ptr addrspace(1) %gptr, i32 %x)
 
 ; GCN-LABEL: {{^}}store_flat_trunc_i8:
 ; GCN: flat_store_{{byte|b8}}
-define amdgpu_kernel void @store_flat_trunc_i8(ptr addrspace(1) %gptr, i32 %x) #0 {
+define amdgpu_kernel void @store_flat_trunc_i8(ptr addrspace(1) %gptr, i32 %x) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   %y = trunc i32 %x to i8
   store volatile i8 %y, ptr %fptr, align 2
@@ -57,7 +57,7 @@ define amdgpu_kernel void @store_flat_trunc_i8(ptr addrspace(1) %gptr, i32 %x) #
 
 ; GCN-LABEL: load_flat_i32:
 ; GCN: flat_load_{{dword|b32}}
-define amdgpu_kernel void @load_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
+define amdgpu_kernel void @load_flat_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   %fload = load volatile i32, ptr %fptr, align 4
   store i32 %fload, ptr addrspace(1) %out, align 4
@@ -66,7 +66,7 @@ define amdgpu_kernel void @load_flat_i32(ptr addrspace(1) noalias %out, ptr addr
 
 ; GCN-LABEL: load_flat_i64:
 ; GCN: flat_load_{{dwordx2|b64}}
-define amdgpu_kernel void @load_flat_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
+define amdgpu_kernel void @load_flat_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   %fload = load volatile i64, ptr %fptr, align 8
   store i64 %fload, ptr addrspace(1) %out, align 8
@@ -75,7 +75,7 @@ define amdgpu_kernel void @load_flat_i64(ptr addrspace(1) noalias %out, ptr addr
 
 ; GCN-LABEL: load_flat_v4i32:
 ; GCN: flat_load_{{dwordx4|b128}}
-define amdgpu_kernel void @load_flat_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
+define amdgpu_kernel void @load_flat_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   %fload = load volatile <4 x i32>, ptr %fptr, align 32
   store <4 x i32> %fload, ptr addrspace(1) %out, align 8
@@ -84,7 +84,7 @@ define amdgpu_kernel void @load_flat_v4i32(ptr addrspace(1) noalias %out, ptr ad
 
 ; GCN-LABEL: sextload_flat_i8:
 ; GCN: flat_load_{{sbyte|i8}}
-define amdgpu_kernel void @sextload_flat_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
+define amdgpu_kernel void @sextload_flat_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   %fload = load volatile i8, ptr %fptr, align 4
   %ext = sext i8 %fload to i32
@@ -94,7 +94,7 @@ define amdgpu_kernel void @sextload_flat_i8(ptr addrspace(1) noalias %out, ptr a
 
 ; GCN-LABEL: zextload_flat_i8:
 ; GCN: flat_load_{{ubyte|u8}}
-define amdgpu_kernel void @zextload_flat_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
+define amdgpu_kernel void @zextload_flat_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   %fload = load volatile i8, ptr %fptr, align 4
   %ext = zext i8 %fload to i32
@@ -104,7 +104,7 @@ define amdgpu_kernel void @zextload_flat_i8(ptr addrspace(1) noalias %out, ptr a
 
 ; GCN-LABEL: sextload_flat_i16:
 ; GCN: flat_load_{{sshort|i16}}
-define amdgpu_kernel void @sextload_flat_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
+define amdgpu_kernel void @sextload_flat_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   %fload = load volatile i16, ptr %fptr, align 4
   %ext = sext i16 %fload to i32
@@ -114,7 +114,7 @@ define amdgpu_kernel void @sextload_flat_i16(ptr addrspace(1) noalias %out, ptr
 
 ; GCN-LABEL: zextload_flat_i16:
 ; GCN: flat_load_{{ushort|u16}}
-define amdgpu_kernel void @zextload_flat_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) #0 {
+define amdgpu_kernel void @zextload_flat_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %gptr) nounwind {
   %fptr = addrspacecast ptr addrspace(1) %gptr to ptr
   %fload = load volatile i16, ptr %fptr, align 4
   %ext = zext i16 %fload to i32
@@ -198,7 +198,7 @@ define void @flat_scratch_multidword_store_func(ptr %maybe.scratch) {
 ; GCN-LABEL: {{^}}store_flat_i8_max_offset:
 ; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
 ; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}}
-define amdgpu_kernel void @store_flat_i8_max_offset(ptr %fptr, i8 %x) #0 {
+define amdgpu_kernel void @store_flat_i8_max_offset(ptr %fptr, i8 %x) nounwind {
   %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4095
   store volatile i8 %x, ptr %fptr.offset
   ret void
@@ -206,7 +206,7 @@ define amdgpu_kernel void @store_flat_i8_max_offset(ptr %fptr, i8 %x) #0 {
 
 ; GCN-LABEL: {{^}}store_flat_i8_max_offset_p1:
 ; GCN: flat_store_{{byte|b8}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{( dlc)?}}{{$}}
-define amdgpu_kernel void @store_flat_i8_max_offset_p1(ptr %fptr, i8 %x) #0 {
+define amdgpu_kernel void @store_flat_i8_max_offset_p1(ptr %fptr, i8 %x) nounwind {
   %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4096
   store volatile i8 %x, ptr %fptr.offset
   ret void
@@ -218,7 +218,7 @@ define amdgpu_kernel void @store_flat_i8_max_offset_p1(ptr %fptr, i8 %x) #0 {
 ; GFX9: v_add_co_u32_e64 v{{[0-9]+}}, vcc, -2, s
 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1,
 ; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @store_flat_i8_neg_offset(ptr %fptr, i8 %x) #0 {
+define amdgpu_kernel void @store_flat_i8_neg_offset(ptr %fptr, i8 %x) nounwind {
   %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 -2
   store volatile i8 %x, ptr %fptr.offset
   ret void
@@ -229,7 +229,7 @@ define amdgpu_kernel void @store_flat_i8_neg_offset(ptr %fptr, i8 %x) #0 {
 ; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc{{$}}
 ; GFX10: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}}
 ; GFX11: flat_load_u8 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095 glc dlc{{$}}
-define amdgpu_kernel void @load_flat_i8_max_offset(ptr %fptr) #0 {
+define amdgpu_kernel void @load_flat_i8_max_offset(ptr %fptr) nounwind {
   %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4095
   %val = load volatile i8, ptr %fptr.offset
   ret void
@@ -239,7 +239,7 @@ define amdgpu_kernel void @load_flat_i8_max_offset(ptr %fptr) #0 {
 ; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
 ; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
 ; GFX10PLUS: flat_load_{{ubyte|u8}} v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc dlc{{$}}
-define amdgpu_kernel void @load_flat_i8_max_offset_p1(ptr %fptr) #0 {
+define amdgpu_kernel void @load_flat_i8_max_offset_p1(ptr %fptr) nounwind {
   %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 4096
   %val = load volatile i8, ptr %fptr.offset
   ret void
@@ -251,11 +251,8 @@ define amdgpu_kernel void @load_flat_i8_max_offset_p1(ptr %fptr) #0 {
 ; GFX9: v_add_co_u32_e64 v{{[0-9]+}}, vcc, -2, s
 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1,
 ; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
-define amdgpu_kernel void @load_flat_i8_neg_offset(ptr %fptr) #0 {
+define amdgpu_kernel void @load_flat_i8_neg_offset(ptr %fptr) nounwind {
   %fptr.offset = getelementptr inbounds i8, ptr %fptr, i64 -2
   %val = load volatile i8, ptr %fptr.offset
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll
index ce4beb8789dc3d..967b646e352586 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll
@@ -60,7 +60,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
   ret float %ret
 }
 
-define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) #0 {
+define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
   ; GFX940: bb.0 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -88,7 +88,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da
   ret void
 }
 
-define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) #0 {
+define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
   ; GFX940: bb.0 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -119,5 +119,3 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data
 }
 
 declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr, float)
-
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
index afd38136870b86..25e30d9864ca9b 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
@@ -44,7 +44,7 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %da
   ret double %ret
 }
 
-define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) #0 {
+define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.0 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -63,7 +63,7 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d
   ret void
 }
 
-define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) #0 {
+define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
   ; GFX90A_GFX940: bb.0 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -87,5 +87,3 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
 }
 
 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr, double)
-
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index b9583a73295e26..8dcb7720604076 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_OPT %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_ARCH %s
 
-declare void @extern_func() #0
+declare void @extern_func() nounwind
 
 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
 ; FLAT_SCR_OPT-LABEL: stack_object_addrspacecast_in_kernel_no_calls:
@@ -415,7 +415,5 @@ define amdgpu_kernel void @kernel_no_calls_no_stack() {
   ret void
 }
 
-attributes #0 = { nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll
index 1633d21c41d5ca..d0adf33a5d001a 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll
@@ -101,7 +101,7 @@ entry:
 ; VI-XNACK: NumSgprs: 6
 ; GFX9-ARCH-FLAT: ; NumSgprs: 6
 ; GFX10-ARCH-FLAT: ; NumSgprs: 0
-define amdgpu_kernel void @use_flat_scr() #0 {
+define amdgpu_kernel void @use_flat_scr() nounwind {
 entry:
   call void asm sideeffect "; clobber ", "~{flat_scratch}"()
   ret void
@@ -118,7 +118,7 @@ entry:
 ; VI-XNACK: NumSgprs: 6
 ; GFX9-ARCH-FLAT: ; NumSgprs: 6
 ; GFX10-ARCH-FLAT: ; NumSgprs: 0
-define amdgpu_kernel void @use_flat_scr_lo() #0 {
+define amdgpu_kernel void @use_flat_scr_lo() nounwind {
 entry:
   call void asm sideeffect "; clobber ", "~{flat_scratch_lo}"()
   ret void
@@ -135,13 +135,11 @@ entry:
 ; VI-XNACK: NumSgprs: 6
 ; GFX9-ARCH-FLAT: ; NumSgprs: 6
 ; GFX10-ARCH-FLAT: ; NumSgprs: 0
-define amdgpu_kernel void @use_flat_scr_hi() #0 {
+define amdgpu_kernel void @use_flat_scr_hi() nounwind {
 entry:
   call void asm sideeffect "; clobber ", "~{flat_scratch_hi}"()
   ret void
 }
 
-attributes #0 = { nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
index bac2d8b8b40c26..23e0b49e33f5b1 100644
--- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
@@ -11,14 +11,14 @@
 ; beneficial even without fp32 denormals, but they do require no-infs-fp-math
 ; for correctness.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare double @llvm.fabs.f64(double) #0
-declare double @llvm.fma.f64(double, double, double) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 ; (fadd (fmul x, y), z) -> (fma x, y, z)
-define amdgpu_kernel void @combine_to_fma_f64_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_f64_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_f64_0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -55,7 +55,7 @@ define amdgpu_kernel void @combine_to_fma_f64_0(ptr addrspace(1) noalias %out, p
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -72,7 +72,7 @@ define amdgpu_kernel void @combine_to_fma_f64_0(ptr addrspace(1) noalias %out, p
 }
 
 ; (fadd (fmul x, y), z) -> (fma x, y, z)
-define amdgpu_kernel void @combine_to_fma_f64_0_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_f64_0_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_f64_0_2use:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -121,7 +121,7 @@ define amdgpu_kernel void @combine_to_fma_f64_0_2use(ptr addrspace(1) noalias %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -143,7 +143,7 @@ define amdgpu_kernel void @combine_to_fma_f64_0_2use(ptr addrspace(1) noalias %o
 }
 
 ; (fadd x, (fmul y, z)) -> (fma y, z, x)
-define amdgpu_kernel void @combine_to_fma_f64_1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_f64_1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_f64_1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -180,7 +180,7 @@ define amdgpu_kernel void @combine_to_fma_f64_1(ptr addrspace(1) noalias %out, p
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -197,7 +197,7 @@ define amdgpu_kernel void @combine_to_fma_f64_1(ptr addrspace(1) noalias %out, p
 }
 
 ; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
-define amdgpu_kernel void @combine_to_fma_fsub_0_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_0_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_fsub_0_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -234,7 +234,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_0_f64(ptr addrspace(1) noalias %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -251,7 +251,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_0_f64(ptr addrspace(1) noalias %o
 }
 
 ; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
-define amdgpu_kernel void @combine_to_fma_fsub_f64_0_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_f64_0_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_fsub_f64_0_2use:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -300,7 +300,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_f64_0_2use(ptr addrspace(1) noali
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -322,7 +322,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_f64_0_2use(ptr addrspace(1) noali
 }
 
 ; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
-define amdgpu_kernel void @combine_to_fma_fsub_1_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_1_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_fsub_1_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -359,7 +359,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_1_f64(ptr addrspace(1) noalias %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -376,7 +376,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_1_f64(ptr addrspace(1) noalias %o
 }
 
 ; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
-define amdgpu_kernel void @combine_to_fma_fsub_1_f64_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_1_f64_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_fsub_1_f64_2use:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -425,7 +425,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_1_f64_2use(ptr addrspace(1) noali
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -447,7 +447,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_1_f64_2use(ptr addrspace(1) noali
 }
 
 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
-define amdgpu_kernel void @combine_to_fma_fsub_2_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_2_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_fsub_2_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -484,7 +484,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64(ptr addrspace(1) noalias %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -503,7 +503,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64(ptr addrspace(1) noalias %o
 }
 
 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
-define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_fsub_2_f64_2uses_neg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -552,7 +552,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -576,7 +576,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(ptr addrspace(1)
 }
 
 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
-define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_mul(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_mul(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: combine_to_fma_fsub_2_f64_2uses_mul:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -625,7 +625,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_mul(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -649,7 +649,7 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_mul(ptr addrspace(1)
 }
 
 ; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
-define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-NOFMA-LABEL: aggressive_combine_to_fma_fsub_0_f64:
 ; SI-NOFMA:       ; %bb.0:
 ; SI-NOFMA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -747,7 +747,7 @@ define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(ptr addrspace(1)
 ; GFX11-FMA-NEXT:    s_nop 0
 ; GFX11-FMA-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-FMA-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -762,7 +762,7 @@ define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(ptr addrspace(1)
   %v = load volatile double, ptr addrspace(1) %gep.4
 
   %tmp0 = fmul double %u, %v
-  %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0
+  %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) nounwind readnone
   %tmp2 = fsub double %tmp1, %z
 
   store double %tmp2, ptr addrspace(1) %gep.out
@@ -771,7 +771,7 @@ define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(ptr addrspace(1)
 
 ; fold (fsub x, (fma y, z, (fmul u, v)))
 ;   -> (fma (fneg y), z, (fma (fneg u), v, x))
-define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
+define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-NOFMA-LABEL: aggressive_combine_to_fma_fsub_1_f64:
 ; SI-NOFMA:       ; %bb.0:
 ; SI-NOFMA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -869,7 +869,7 @@ define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(ptr addrspace(1)
 ; GFX11-FMA-NEXT:    s_nop 0
 ; GFX11-FMA-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-FMA-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
@@ -885,7 +885,7 @@ define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(ptr addrspace(1)
 
   ; nsz flag is needed since this combine may change sign of zero
   %tmp0 = fmul nsz double %u, %v
-  %tmp1 = call nsz double @llvm.fma.f64(double %y, double %z, double %tmp0) #0
+  %tmp1 = call nsz double @llvm.fma.f64(double %y, double %z, double %tmp0) nounwind readnone
   %tmp2 = fsub nsz double %x, %tmp1
 
   store double %tmp2, ptr addrspace(1) %gep.out
@@ -2217,7 +2217,7 @@ define amdgpu_kernel void @test_f64_interp(ptr addrspace(1) %out,
 }
 
 ; Make sure negative constant cancels out fneg
-define amdgpu_kernel void @fma_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fma_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind readnone {
 ; SI-LABEL: fma_neg_2.0_neg_a_b_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -2263,7 +2263,7 @@ define amdgpu_kernel void @fma_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @fma_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fma_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind readnone {
 ; SI-LABEL: fma_2.0_neg_a_b_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -2309,7 +2309,7 @@ define amdgpu_kernel void @fma_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @fma_neg_b_c_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #2 {
+define amdgpu_kernel void @fma_neg_b_c_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: fma_neg_b_c_v4f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2366,7 +2366,3 @@ define amdgpu_kernel void @fma_neg_b_c_v4f32(ptr addrspace(1) %out, ptr addrspac
   store <4 x float> %fma0, ptr addrspace(1) %gep.out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
index 4f49d4ce3a040a..240795fc8a2f2f 100644
--- a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
@@ -4,7 +4,7 @@
 ; GCN-LABEL: {{^}}addMul2D:
 ; GFX1010: v_fmac_f16
 ; GFX1010: v_fmac_f16
-define hidden <4 x half> @addMul2D(ptr nocapture readonly %arg, ptr addrspace(4) nocapture readonly %arg1, <2 x i32> %arg2, i32 %arg3) local_unnamed_addr #0 {
+define hidden <4 x half> @addMul2D(ptr nocapture readonly %arg, ptr addrspace(4) nocapture readonly %arg1, <2 x i32> %arg2, i32 %arg3) local_unnamed_addr convergent nounwind readonly {
 bb:
   %tmp = extractelement <2 x i32> %arg2, i64 1
   %tmp4 = icmp sgt i32 %tmp, 0
@@ -76,7 +76,7 @@ bb36:                                             ; preds = %bb32, %bb
 }
 
 ; Function Attrs: norecurse nounwind readnone
-define linkonce_odr hidden <4 x half> @_Z13convert_half4Dv4_h(<4 x i8> %arg) local_unnamed_addr #1 {
+define linkonce_odr hidden <4 x half> @_Z13convert_half4Dv4_h(<4 x i8> %arg) local_unnamed_addr norecurse nounwind readnone {
 bb:
   %tmp = extractelement <4 x i8> %arg, i64 0
   %tmp1 = uitofp i8 %tmp to half
@@ -94,6 +94,3 @@ bb:
 }
 
 declare half @llvm.fmuladd.f16(half, half, half)
-
-attributes #0 = { convergent nounwind readonly}
-attributes #1 = { norecurse nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll b/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll
index 25776bb8380ae5..911ac0cf32be04 100644
--- a/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll
@@ -17,7 +17,7 @@
 
 ; Check for incorrect fmad formation when distributing
 
-define float @unsafe_fmul_fadd_distribute_fast_f32(float %arg0, float %arg1) #0 {
+define float @unsafe_fmul_fadd_distribute_fast_f32(float %arg0, float %arg1) "no-infs-fp-math"="true" "unsafe-fp-math"="true" {
 ; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
 ; FMA:       ; %bb.0:
 ; FMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -59,7 +59,7 @@ define float @unsafe_fmul_fadd_distribute_fast_f32(float %arg0, float %arg1) #0
   ret float %tmp1
 }
 
-define float @unsafe_fmul_fsub_distribute_fast_f32(float %arg0, float %arg1) #0 {
+define float @unsafe_fmul_fsub_distribute_fast_f32(float %arg0, float %arg1) "no-infs-fp-math"="true" "unsafe-fp-math"="true" {
 ; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
 ; FMA:       ; %bb.0:
 ; FMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -101,7 +101,7 @@ define float @unsafe_fmul_fsub_distribute_fast_f32(float %arg0, float %arg1) #0
   ret float %tmp1
 }
 
-define <2 x float> @unsafe_fmul_fadd_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
+define <2 x float> @unsafe_fmul_fadd_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) "no-infs-fp-math"="true" "unsafe-fp-math"="true" {
 ; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
 ; FMA:       ; %bb.0:
 ; FMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -149,7 +149,7 @@ define <2 x float> @unsafe_fmul_fadd_distribute_fast_v2f32(<2 x float> %arg0, <2
   ret <2 x float> %tmp1
 }
 
-define <2 x float> @unsafe_fmul_fsub_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
+define <2 x float> @unsafe_fmul_fsub_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) "no-infs-fp-math"="true" "unsafe-fp-math"="true" {
 ; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
 ; FMA:       ; %bb.0:
 ; FMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -198,7 +198,7 @@ define <2 x float> @unsafe_fmul_fsub_distribute_fast_v2f32(<2 x float> %arg0, <2
   ret <2 x float> %tmp1
 }
 
-define <2 x float> @unsafe_fast_fmul_fadd_distribute_post_legalize_f32(float %arg0, <2 x float> %arg1) #0 {
+define <2 x float> @unsafe_fast_fmul_fadd_distribute_post_legalize_f32(float %arg0, <2 x float> %arg1) "no-infs-fp-math"="true" "unsafe-fp-math"="true" {
 ; FMA-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
 ; FMA:       ; %bb.0:
 ; FMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -241,7 +241,7 @@ define <2 x float> @unsafe_fast_fmul_fadd_distribute_post_legalize_f32(float %ar
   ret <2 x float> %tmp1
 }
 
-define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, <2 x float> %arg1) #0 {
+define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, <2 x float> %arg1) "no-infs-fp-math"="true" "unsafe-fp-math"="true" {
 ; FMA-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
 ; FMA:       ; %bb.0:
 ; FMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -283,5 +283,3 @@ define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, <
   %tmp1 = fmul fast <2 x float> %arg1, %splat
   ret <2 x float> %tmp1
 }
-
-attributes #0 = { "no-infs-fp-math"="true" "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fmax3.ll b/llvm/test/CodeGen/AMDGPU/fmax3.ll
index 23eb73038917d2..30c16d5abebceb 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmax3_olt_0_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -136,7 +136,7 @@ define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrs
 }
 
 ; Commute operand of second fmax
-define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmax3_olt_1_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -267,7 +267,7 @@ define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmax3_olt_0_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -407,7 +407,7 @@ define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrs
 }
 
 ; Commute operand of second fmax
-define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmax3_olt_1_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -548,7 +548,7 @@ define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrs
 
 ; Checks whether the test passes; performMinMaxCombine() should not optimize vector patterns of max3
 ; since there are no pack instructions for fmax3.
-define <2 x half> @no_fmax3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #2 {
+define <2 x half> @no_fmax3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) nounwind "no-nans-fp-math"="true" {
 ; SI-LABEL: no_fmax3_v2f16:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -609,11 +609,7 @@ entry:
   ret <2 x half> %res
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare half @llvm.maxnum.f16(half, half) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare float @llvm.maxnum.f32(float, float) nounwind readnone speculatable
+declare half @llvm.maxnum.f16(half, half) nounwind readnone speculatable
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
index e874ee56f594ca..fd19e361d28885 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
@@ -11,7 +11,7 @@
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SAFE %s
 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-NNAN %s
 
-define half @test_fmax_legacy_ugt_f16(half %a, half %b) #0 {
+define half @test_fmax_legacy_ugt_f16(half %a, half %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -75,7 +75,7 @@ define half @test_fmax_legacy_ugt_f16(half %a, half %b) #0 {
   ret half %val
 }
 
-define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 {
+define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -169,7 +169,7 @@ define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 {
   ret <2 x half> %val
 }
 
-define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
+define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -282,7 +282,7 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
   ret <3 x half> %val
 }
 
-define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -424,7 +424,7 @@ define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 {
   ret <4 x half> %val
 }
 
-define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -659,5 +659,3 @@ define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
   %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b
   ret <8 x half> %val
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
index 01b2f207388e8a..30ddf66bf6fd6f 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
@@ -4,7 +4,7 @@
 
 ; Make sure we don't try to form FMAX_LEGACY nodes with f64
 
-define amdgpu_kernel void @test_fmax_legacy_uge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_uge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmax_legacy_uge_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -43,7 +43,7 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -56,7 +56,7 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmax_legacy_oge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_oge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmax_legacy_oge_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -95,7 +95,7 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -108,7 +108,7 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmax_legacy_ugt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_ugt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmax_legacy_ugt_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -147,7 +147,7 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -160,7 +160,7 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmax_legacy_ogt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_ogt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmax_legacy_ogt_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -199,7 +199,7 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -212,7 +212,4 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
index 1da621cb9f09de..c97fdbc2c6b201 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
@@ -6,7 +6,7 @@
 
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope --check-prefixes=EG,FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32:
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
@@ -20,8 +20,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_uge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmax_legacy_uge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -48,8 +48,8 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32(ptr addrspace(1) %out, ptr a
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
 
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -75,8 +75,8 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(ptr addrspace(1) %o
 
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_oge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmax_legacy_oge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -101,8 +101,8 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f32(ptr addrspace(1) %out, ptr a
 
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_ugt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmax_legacy_ugt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -126,8 +126,8 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f32(ptr addrspace(1) %out, ptr a
 
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_ogt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmax_legacy_ogt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -152,8 +152,8 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32(ptr addrspace(1) %out, ptr a
 
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr <1 x float>, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr <1 x float>, ptr addrspace(1) %gep.0, i32 1
 
@@ -185,8 +185,8 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr
 ; GCN-NONAN: v_max_f32_e32
 
 ; GCN-NOT: v_max
-define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr <3 x float>, ptr addrspace(1) %gep.0, i32 1
 
@@ -208,8 +208,8 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr
 ; GCN-NOT: v_max_
 
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -222,6 +222,3 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(ptr addrspace(1) %
   store i1 %cmp, ptr addrspace(1) %out1
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
index 34e1cb89607b98..1c37bef5a7b9c2 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
@@ -1,16 +1,16 @@
 ; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare double @llvm.maxnum.f64(double, double) #0
-declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0
-declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) #0
-declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) #0
-declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0
+declare double @llvm.maxnum.f64(double, double) nounwind readnone
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) nounwind readnone
+declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) nounwind readnone
+declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) nounwind readnone
 
 ; FUNC-LABEL: @test_fmax_f64
 ; SI: v_max_f64
 define amdgpu_kernel void @test_fmax_f64(ptr addrspace(1) %out, double %a, double %b) nounwind {
-  %val = call double @llvm.maxnum.f64(double %a, double %b) #0
+  %val = call double @llvm.maxnum.f64(double %a, double %b) nounwind readnone
   store double %val, ptr addrspace(1) %out, align 8
   ret void
 }
@@ -19,7 +19,7 @@ define amdgpu_kernel void @test_fmax_f64(ptr addrspace(1) %out, double %a, doubl
 ; SI: v_max_f64
 ; SI: v_max_f64
 define amdgpu_kernel void @test_fmax_v2f64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b) nounwind {
-  %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) #0
+  %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) nounwind readnone
   store <2 x double> %val, ptr addrspace(1) %out, align 16
   ret void
 }
@@ -30,7 +30,7 @@ define amdgpu_kernel void @test_fmax_v2f64(ptr addrspace(1) %out, <2 x double> %
 ; SI: v_max_f64
 ; SI: v_max_f64
 define amdgpu_kernel void @test_fmax_v4f64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b) nounwind {
-  %val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) #0
+  %val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) nounwind readnone
   store <4 x double> %val, ptr addrspace(1) %out, align 32
   ret void
 }
@@ -45,7 +45,7 @@ define amdgpu_kernel void @test_fmax_v4f64(ptr addrspace(1) %out, <4 x double> %
 ; SI: v_max_f64
 ; SI: v_max_f64
 define amdgpu_kernel void @test_fmax_v8f64(ptr addrspace(1) %out, <8 x double> %a, <8 x double> %b) nounwind {
-  %val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) #0
+  %val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) nounwind readnone
   store <8 x double> %val, ptr addrspace(1) %out, align 64
   ret void
 }
@@ -68,9 +68,7 @@ define amdgpu_kernel void @test_fmax_v8f64(ptr addrspace(1) %out, <8 x double> %
 ; SI: v_max_f64
 ; SI: v_max_f64
 define amdgpu_kernel void @test_fmax_v16f64(ptr addrspace(1) %out, <16 x double> %a, <16 x double> %b) nounwind {
-  %val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) #0
+  %val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) nounwind readnone
   store <16 x double> %val, ptr addrspace(1) %out, align 128
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
index 38640a18b5aee6..9df7fd5eda7a81 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -7,8 +7,8 @@
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[QUIET1]], [[QUIET0]]
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(ptr addrspace(1) %out, float %a, float %b) #0 {
-  %val = call float @llvm.maxnum.f32(float %a, float %b) #1
+define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(ptr addrspace(1) %out, float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
   store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -16,15 +16,15 @@ define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(ptr addrspace(1) %out, flo
 ; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_off:
 ; GCN: v_max_f32_e32 v0, v0, v1
 ; GCN-NEXT: ; return
-define amdgpu_ps float @test_fmax_f32_ieee_mode_off(float %a, float %b) #0 {
-  %val = call float @llvm.maxnum.f32(float %a, float %b) #1
+define amdgpu_ps float @test_fmax_f32_ieee_mode_off(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
   ret float %val
 }
 
 ; GCN-LABEL: {{^}}test_fmax_v2f32:
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
-define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
   store <2 x float> %val, ptr addrspace(1) %out, align 8
   ret void
@@ -36,7 +36,7 @@ define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a
 ; GCN: v_max_f32_e32
 ; GCN-NOT: v_max_f32
 define amdgpu_kernel void @test_fmax_v3f32(ptr addrspace(1) %out, <3 x float> %a, <3 x float> %b) nounwind {
-  %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0
+  %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   store <3 x float> %val, ptr addrspace(1) %out, align 16
   ret void
 }
@@ -46,7 +46,7 @@ define amdgpu_kernel void @test_fmax_v3f32(ptr addrspace(1) %out, <3 x float> %a
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
-define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
   store <4 x float> %val, ptr addrspace(1) %out, align 16
   ret void
@@ -61,7 +61,7 @@ define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
-define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b)
   store <8 x float> %val, ptr addrspace(1) %out, align 32
   ret void
@@ -84,7 +84,7 @@ define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
-define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b)
   store <16 x float> %val, ptr addrspace(1) %out, align 64
   ret void
@@ -94,7 +94,7 @@ define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float>
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.maxnum.f32(float 1.0, float 2.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -104,7 +104,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) #0 {
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -114,7 +114,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out)
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -124,7 +124,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out)
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -134,7 +134,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out)
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.maxnum.f32(float 0.0, float 0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -144,7 +144,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) #
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.maxnum.f32(float 0.0, float -0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -154,7 +154,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -164,7 +164,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.maxnum.f32(float -0.0, float -0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -172,31 +172,31 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) #
 
 ; GCN-LABEL: {{^}}fmax_var_immediate_f32_no_ieee:
 ; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
-define amdgpu_ps float @fmax_var_immediate_f32_no_ieee(float inreg %a) #0 {
-  %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
+define amdgpu_ps float @fmax_var_immediate_f32_no_ieee(float inreg %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.maxnum.f32(float %a, float 2.0) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret float %val
 }
 
 ; GCN-LABEL: {{^}}fmax_immediate_var_f32_no_ieee:
 ; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
-define amdgpu_ps float @fmax_immediate_var_f32_no_ieee(float inreg %a) #0 {
-  %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
+define amdgpu_ps float @fmax_immediate_var_f32_no_ieee(float inreg %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.maxnum.f32(float 2.0, float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret float %val
 }
 
 ; GCN-LABEL: {{^}}fmax_var_literal_f32_no_ieee:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
 ; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-define amdgpu_ps float @fmax_var_literal_f32_no_ieee(float inreg %a) #0 {
-  %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
+define amdgpu_ps float @fmax_var_literal_f32_no_ieee(float inreg %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.maxnum.f32(float %a, float 99.0) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret float %val
 }
 
 ; GCN-LABEL: {{^}}fmax_literal_var_f32_no_ieee:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
 ; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-define amdgpu_ps float @fmax_literal_var_f32_no_ieee(float inreg %a) #0 {
-  %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
+define amdgpu_ps float @fmax_literal_var_f32_no_ieee(float inreg %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.maxnum.f32(float 99.0, float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret float %val
 }
 
@@ -205,18 +205,15 @@ define amdgpu_ps float @fmax_literal_var_f32_no_ieee(float inreg %a) #0 {
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN-NOT: v_max_f32
-define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) #0 {
-  %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0
+define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret <3 x float> %val
 }
 
-declare float @llvm.maxnum.f32(float, float) #1
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
-declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
-declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) nounwind readnone
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) nounwind readnone
+declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) nounwind readnone
 declare double @llvm.maxnum.f64(double, double)
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
index f5c55ffef30a15..2dd3a75fb82640 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
@@ -3,7 +3,7 @@
 ; EG-LABEL: {{^}}test_fmax_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @test_fmax_f32(ptr addrspace(1) %out, float %a, float %b) nounwind {
   %val = call float @llvm.maxnum.f32(float %a, float %b)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -13,7 +13,7 @@ define amdgpu_kernel void @test_fmax_f32(ptr addrspace(1) %out, float %a, float
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) nounwind {
   %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
   store <2 x float> %val, ptr addrspace(1) %out, align 8
   ret void
@@ -25,7 +25,7 @@ define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a
 ; EG: MAX_DX10 {{.*}}[[OUT]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) nounwind {
   %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
   store <4 x float> %val, ptr addrspace(1) %out, align 16
   ret void
@@ -42,7 +42,7 @@ define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
-define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) nounwind {
   %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b)
   store <8 x float> %val, ptr addrspace(1) %out, align 32
   ret void
@@ -69,7 +69,7 @@ define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
-define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) nounwind {
   %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b)
   store <16 x float> %val, ptr addrspace(1) %out, align 64
   ret void
@@ -79,7 +79,7 @@ define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float>
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 1.0, float 2.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -90,7 +90,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) #0 {
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 ; EG: 2143289344(nan)
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -100,7 +100,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out)
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -110,7 +110,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out)
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -120,7 +120,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out)
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 0.0, float 0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -130,7 +130,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) #
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.maxnum.f32(float 0.0, float -0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -140,7 +140,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -150,7 +150,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.maxnum.f32(float -0.0, float -0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -159,7 +159,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) #
 ; EG-LABEL: {{^}}fmax_var_immediate_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 * [[OUT]]
-define amdgpu_kernel void @fmax_var_immediate_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @fmax_var_immediate_f32(ptr addrspace(1) %out, float %a) nounwind {
   %val = call float @llvm.maxnum.f32(float %a, float 2.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -168,7 +168,7 @@ define amdgpu_kernel void @fmax_var_immediate_f32(ptr addrspace(1) %out, float %
 ; EG-LABEL: {{^}}fmax_immediate_var_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_immediate_var_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @fmax_immediate_var_f32(ptr addrspace(1) %out, float %a) nounwind {
   %val = call float @llvm.maxnum.f32(float 2.0, float %a)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -177,7 +177,7 @@ define amdgpu_kernel void @fmax_immediate_var_f32(ptr addrspace(1) %out, float %
 ; EG-LABEL: {{^}}fmax_var_literal_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_var_literal_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @fmax_var_literal_f32(ptr addrspace(1) %out, float %a) nounwind {
   %val = call float @llvm.maxnum.f32(float %a, float 99.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -186,18 +186,15 @@ define amdgpu_kernel void @fmax_var_literal_f32(ptr addrspace(1) %out, float %a)
 ; EG-LABEL: {{^}}fmax_literal_var_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_literal_var_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @fmax_literal_var_f32(ptr addrspace(1) %out, float %a) nounwind {
   %val = call float @llvm.maxnum.f32(float 99.0, float %a)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
-declare float @llvm.maxnum.f32(float, float) #1
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
-declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
-declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
-declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) nounwind readnone
+declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) nounwind readnone
 declare double @llvm.maxnum.f64(double, double)
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
index 99b163dc9753b7..e4d8f91bbf1d57 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
@@ -12,11 +12,11 @@
 ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
 
 
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
-declare float @llvm.fabs.f32(float) #0
-declare half @llvm.fabs.f16(half) #0
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.fabs.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fabs.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
-define half @fmed3_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16(half %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -70,7 +70,7 @@ define half @fmed3_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_flags(half %arg0, half %arg1, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_flags(half %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_flags:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -124,7 +124,7 @@ define half @fmed3_f32_fpext_f16_flags(half %arg0, half %arg1, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_multi_use(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
+define half @fmed3_f32_fpext_f16_multi_use(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -179,7 +179,7 @@ define half @fmed3_f32_fpext_f16_multi_use(half %arg0, half %arg1, half %arg2, p
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_k0(half %arg1, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_k0(half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -230,7 +230,7 @@ define half @fmed3_f32_fpext_f16_k0(half %arg1, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_k1(half %arg0, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_k1(half %arg0, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k1:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -281,7 +281,7 @@ define half @fmed3_f32_fpext_f16_k1(half %arg0, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_k2(half %arg0, half %arg1) #1 {
+define half @fmed3_f32_fpext_f16_k2(half %arg0, half %arg1) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k2:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -332,7 +332,7 @@ define half @fmed3_f32_fpext_f16_k2(half %arg0, half %arg1) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_k0_k1(half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_k0_k1(half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k1:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -385,7 +385,7 @@ define half @fmed3_f32_fpext_f16_k0_k1(half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_k0_k2(half %arg1) #1 {
+define half @fmed3_f32_fpext_f16_k0_k2(half %arg1) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k2:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -433,7 +433,7 @@ define half @fmed3_f32_fpext_f16_k0_k2(half %arg1) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_fabs(half %arg0, half %arg1, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_fabs(half %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fabs:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -496,7 +496,7 @@ define half @fmed3_f32_fpext_f16_fabs(half %arg0, half %arg1, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
+define half @fmed3_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_fabs_f32_fpext_f16:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -545,7 +545,7 @@ define half @fmed3_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_fneg(half %arg0, half %arg1, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_fneg(half %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -602,7 +602,7 @@ define half @fmed3_f32_fpext_f16_fneg(half %arg0, half %arg1, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_fneg_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
+define half @fmed3_fneg_f32_fpext_f16(half %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_fneg_f32_fpext_f16:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -651,7 +651,7 @@ define half @fmed3_fneg_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_fneg_fabs(half %arg0, half %arg1, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_fneg_fabs(half %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -717,7 +717,7 @@ define half @fmed3_f32_fpext_f16_fneg_fabs(half %arg0, half %arg1, half %arg2) #
   ret half %cast
 }
 
-define half @fmed3_fneg_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
+define half @fmed3_fneg_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -772,7 +772,7 @@ define half @fmed3_fneg_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #
 ; Negative tests
 ; --------------------------------------------------------------------------------
 
-define bfloat @fmed3_f32_fpext_f16_fptrunc_bf16(half %arg0, half %arg1, half %arg2) #1 {
+define bfloat @fmed3_f32_fpext_f16_fptrunc_bf16(half %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -819,7 +819,7 @@ define bfloat @fmed3_f32_fpext_f16_fptrunc_bf16(half %arg0, half %arg1, half %ar
   ret bfloat %cast
 }
 
-define half @fmed3_f32_fpext_f16_multi_use_0(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
+define half @fmed3_f32_fpext_f16_multi_use_0(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_0:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -887,7 +887,7 @@ define half @fmed3_f32_fpext_f16_multi_use_0(half %arg0, half %arg1, half %arg2,
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_multi_use_1(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
+define half @fmed3_f32_fpext_f16_multi_use_1(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_1:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -954,7 +954,7 @@ define half @fmed3_f32_fpext_f16_multi_use_1(half %arg0, half %arg1, half %arg2,
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_multi_use_2(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
+define half @fmed3_f32_fpext_f16_multi_use_2(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_2:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1021,7 +1021,7 @@ define half @fmed3_f32_fpext_f16_multi_use_2(half %arg0, half %arg1, half %arg2,
   half ret half %cast
 }
 
-define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 {
+define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-LABEL: fmed3_f32_fpext_bf16:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1057,7 +1057,7 @@ define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_0:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1093,7 +1093,7 @@ define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1129,7 +1129,7 @@ define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_bf16_2(half %arg0, half %arg1, bfloat %arg2) #1 {
+define half @fmed3_f32_fpext_f16_bf16_2(half %arg0, half %arg1, bfloat %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1165,7 +1165,7 @@ define half @fmed3_f32_fpext_f16_bf16_2(half %arg0, half %arg1, bfloat %arg2) #1
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_unrepresentable_k0(half %arg1, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_unrepresentable_k0(half %arg1, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1231,7 +1231,7 @@ define half @fmed3_f32_fpext_f16_unrepresentable_k0(half %arg1, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_unrepresentable_k1(half %arg0, half %arg2) #1 {
+define half @fmed3_f32_fpext_f16_unrepresentable_k1(half %arg0, half %arg2) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1297,7 +1297,7 @@ define half @fmed3_f32_fpext_f16_unrepresentable_k1(half %arg0, half %arg2) #1 {
   ret half %cast
 }
 
-define half @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) #1 {
+define half @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) nocallback nofree nosync nounwind speculatable willreturn {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
 ; GFX7-SDAG:       ; %bb.0:
 ; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1362,8 +1362,5 @@ define half @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) #1 {
   %cast = fptrunc float %med3 to half
   ret half %cast
 }
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 764fb992d4d34c..9973a7e234c9f9 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -8,7 +8,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL %s
 
-define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -119,7 +119,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -231,7 +231,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -343,7 +343,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -455,7 +455,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -572,7 +572,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -744,7 +744,7 @@ define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f64:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -862,7 +862,7 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
+define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_fmed3_r_i_i_no_nans_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -966,7 +966,7 @@ define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1120,7 +1120,7 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -1310,7 +1310,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -1500,7 +1500,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -1690,7 +1690,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -1890,7 +1890,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -2096,7 +2096,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -2271,7 +2271,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -2423,7 +2423,7 @@ define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_nnan_call_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -2575,7 +2575,7 @@ define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_fast_call_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -2739,7 +2739,7 @@ define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr
 ; 7: max(min(y, x), min(z, max(y, x)))
 ; + commute outermost max
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -2891,7 +2891,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -3043,7 +3043,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -3233,7 +3233,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat2:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -3385,7 +3385,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat3:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -3537,7 +3537,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat4:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -3689,7 +3689,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -3841,7 +3841,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat6:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -3993,7 +3993,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat7:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -4145,7 +4145,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat8:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -4297,7 +4297,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat9:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -4449,7 +4449,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -4601,7 +4601,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat11:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -4753,7 +4753,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat12:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -4905,7 +4905,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat13:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -5057,7 +5057,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat14:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -5209,7 +5209,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat15:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -5364,7 +5364,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %
 ; Also handle `min` at the root:
 ; min(max(x, y), max(min(x, y), z))
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat16:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -5520,7 +5520,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %
 ; Negative patterns
 ; ---------------------------------------------------------------------
 
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -5722,7 +5722,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -5948,7 +5948,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -6150,7 +6150,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -6356,7 +6356,7 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -6531,7 +6531,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -6706,7 +6706,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -6881,7 +6881,7 @@ define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -7071,7 +7071,7 @@ define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -7283,7 +7283,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(pt
 }
 
 ; A simple min and max is not sufficient
-define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; SI-SDAG-LABEL: v_test_global_nnans_min_max_f32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -7438,7 +7438,7 @@ define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -7563,7 +7563,7 @@ define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -7771,7 +7771,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: two_non_inline_constant:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -7905,7 +7905,7 @@ define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr ad
 }
 
 ; FIXME: Simple stores do not work as a multiple use because they are bitcasted to integer constants.
-define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: one_non_inline_constant:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -8044,7 +8044,7 @@ define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" {
 ; SI-SDAG-LABEL: two_non_inline_constant_multi_use:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -8250,18 +8250,14 @@ define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %o
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.minnum.f32(float, float) #0
-declare float @llvm.maxnum.f32(float, float) #0
-declare double @llvm.minnum.f64(double, double) #0
-declare double @llvm.maxnum.f64(double, double) #0
-declare half @llvm.fabs.f16(half) #0
-declare half @llvm.minnum.f16(half, half) #0
-declare half @llvm.maxnum.f16(half, half) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
-attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare double @llvm.minnum.f64(double, double) nounwind readnone
+declare double @llvm.maxnum.f64(double, double) nounwind readnone
+declare half @llvm.fabs.f16(half) nounwind readnone
+declare half @llvm.minnum.f16(half, half) nounwind readnone
+declare half @llvm.maxnum.f16(half, half) nounwind readnone
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; SI: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmin3.ll b/llvm/test/CodeGen/AMDGPU/fmin3.ll
index 7337d90b4bea63..c41bf133d59ccf 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin3.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-define amdgpu_kernel void @test_fmin3_olt_0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmin3_olt_0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmin3_olt_0_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -136,7 +136,7 @@ define amdgpu_kernel void @test_fmin3_olt_0_f32(ptr addrspace(1) %out, ptr addrs
 }
 
 ; Commute operand of second fmin
-define amdgpu_kernel void @test_fmin3_olt_1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmin3_olt_1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmin3_olt_1_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -267,7 +267,7 @@ define amdgpu_kernel void @test_fmin3_olt_1_f32(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @test_fmin3_olt_0_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmin3_olt_0_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmin3_olt_0_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -407,7 +407,7 @@ define amdgpu_kernel void @test_fmin3_olt_0_f16(ptr addrspace(1) %out, ptr addrs
 }
 
 ; Commute operand of second fmin
-define amdgpu_kernel void @test_fmin3_olt_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmin3_olt_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmin3_olt_1_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -548,7 +548,7 @@ define amdgpu_kernel void @test_fmin3_olt_1_f16(ptr addrspace(1) %out, ptr addrs
 
 ; Checks whether the test passes; performMinMaxCombine() should not optimize vector patterns of min3
 ; since there are no pack instructions for fmin3.
-define <2 x half> @no_fmin3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #2 {
+define <2 x half> @no_fmin3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) nounwind "no-nans-fp-math"="true" {
 ; SI-LABEL: no_fmin3_v2f16:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -609,7 +609,7 @@ entry:
   ret <2 x half> %res
 }
 
-define amdgpu_kernel void @test_fmin3_olt_0_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmin3_olt_0_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmin3_olt_0_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -756,7 +756,7 @@ define amdgpu_kernel void @test_fmin3_olt_0_f64(ptr addrspace(1) %out, ptr addrs
 }
 
 ; Commute operand of second fmin
-define amdgpu_kernel void @test_fmin3_olt_1_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @test_fmin3_olt_1_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
 ; SI-LABEL: test_fmin3_olt_1_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -902,12 +902,8 @@ define amdgpu_kernel void @test_fmin3_olt_1_f64(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare double @llvm.minnum.f64(double, double) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare half @llvm.minnum.f16(half, half) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare double @llvm.minnum.f64(double, double) nounwind readnone speculatable
+declare float @llvm.minnum.f32(float, float) nounwind readnone speculatable
+declare half @llvm.minnum.f16(half, half) nounwind readnone speculatable
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll b/llvm/test/CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll
index 2ac5891773d739..4da71a261342cc 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll
@@ -11,7 +11,7 @@
 
 ; VI-SAFE: v_cmp_nle_f32_e32 vcc, 1.0, v0
 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -1.0, -v0, vcc
-define amdgpu_ps float @min_fneg_select_regression_0(float %a, float %b) #0 {
+define amdgpu_ps float @min_fneg_select_regression_0(float %a, float %b) nounwind {
   %fneg.a = fsub float -0.0, %a
   %cmp.a = fcmp ult float %a, 1.0
   %min.a = select i1 %cmp.a, float %fneg.a, float -1.0
@@ -27,7 +27,7 @@ define amdgpu_ps float @min_fneg_select_regression_0(float %a, float %b) #0 {
 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, 1.0, -v0, vcc
 
 ; VI-NNAN: v_max_f32_e64 v{{[0-9]+}}, -v0, 1.0
-define amdgpu_ps float @min_fneg_select_regression_posk_0(float %a, float %b) #0 {
+define amdgpu_ps float @min_fneg_select_regression_posk_0(float %a, float %b) nounwind {
   %fneg.a = fsub float -0.0, %a
   %cmp.a = fcmp ult float %a, -1.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 1.0
@@ -43,7 +43,7 @@ define amdgpu_ps float @min_fneg_select_regression_posk_0(float %a, float %b) #0
 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -1.0, -v0, vcc
 
 ; GCN-NONAN: v_min_f32_e64 [[MIN:v[0-9]+]], -v0, -1.0
-define amdgpu_ps float @max_fneg_select_regression_0(float %a) #0 {
+define amdgpu_ps float @max_fneg_select_regression_0(float %a) nounwind {
   %fneg.a = fsub float -0.0, %a
   %cmp.a = fcmp ugt float %a, 1.0
   %min.a = select i1 %cmp.a, float %fneg.a, float -1.0
@@ -59,7 +59,7 @@ define amdgpu_ps float @max_fneg_select_regression_0(float %a) #0 {
 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, 1.0, -v0, vcc
 
 ; GCN-NONAN: v_min_f32_e64 [[MIN:v[0-9]+]], -v0, 1.0
-define amdgpu_ps float @max_fneg_select_regression_posk_0(float %a) #0 {
+define amdgpu_ps float @max_fneg_select_regression_posk_0(float %a) nounwind {
   %fneg.a = fsub float -0.0, %a
   %cmp.a = fcmp ugt float %a, -1.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 1.0
@@ -73,7 +73,7 @@ define amdgpu_ps float @max_fneg_select_regression_posk_0(float %a) #0 {
 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, 1.0, -v0, vcc
 
 ; VI-NNAN: v_min_f32_e64 v0, -v0, 1.0
-define amdgpu_ps float @select_fneg_a_or_q_cmp_ugt_a_neg1(float %a, float %b) #0 {
+define amdgpu_ps float @select_fneg_a_or_q_cmp_ugt_a_neg1(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp ugt float %a, -1.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 1.0
@@ -87,7 +87,7 @@ define amdgpu_ps float @select_fneg_a_or_q_cmp_ugt_a_neg1(float %a, float %b) #0
 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, 1.0, -v0, vcc
 
 ; VI-NNAN: v_max_f32_e64 v0, -v0, 1.0
-define amdgpu_ps float @select_fneg_a_or_q_cmp_ult_a_neg1(float %a, float %b) #0 {
+define amdgpu_ps float @select_fneg_a_or_q_cmp_ult_a_neg1(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp ult float %a, -1.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 1.0
@@ -101,7 +101,7 @@ define amdgpu_ps float @select_fneg_a_or_q_cmp_ult_a_neg1(float %a, float %b) #0
 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, 1.0, -v0, vcc
 
 ; VI-NNAN: v_min_f32_e64 v0, -v0, 1.0
-define amdgpu_ps float @select_fneg_a_or_q_cmp_ogt_a_neg1(float %a, float %b) #0 {
+define amdgpu_ps float @select_fneg_a_or_q_cmp_ogt_a_neg1(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp ogt float %a, -1.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 1.0
@@ -115,7 +115,7 @@ define amdgpu_ps float @select_fneg_a_or_q_cmp_ogt_a_neg1(float %a, float %b) #0
 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, 1.0, -v0, vcc
 
 ; VI-NANN: v_max_f32_e64 v0, -v0, 1.0
-define amdgpu_ps float @select_fneg_a_or_q_cmp_olt_a_neg1(float %a, float %b) #0 {
+define amdgpu_ps float @select_fneg_a_or_q_cmp_olt_a_neg1(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp olt float %a, -1.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 1.0
@@ -133,7 +133,7 @@ define amdgpu_ps float @select_fneg_a_or_q_cmp_olt_a_neg1(float %a, float %b) #0
 
 ; VI-NNAN: s_mov_b32 [[K:s[0-9]+]], 0x41000000
 ; VI-NNAN-NEXT: v_min_f32_e64 v0, -v0, [[K]]
-define amdgpu_ps float @select_fneg_a_or_q_cmp_ugt_a_neg8(float %a, float %b) #0 {
+define amdgpu_ps float @select_fneg_a_or_q_cmp_ugt_a_neg8(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp ugt float %a, -8.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 8.0
@@ -151,7 +151,7 @@ define amdgpu_ps float @select_fneg_a_or_q_cmp_ugt_a_neg8(float %a, float %b) #0
 
 ; VI-NNAN: s_mov_b32 [[K:s[0-9]+]], 0x41000000
 ; VI-NNAN-NEXT: v_max_f32_e64 v0, -v0, [[K]]
-define amdgpu_ps float @select_fneg_a_or_q_cmp_ult_a_neg8(float %a, float %b) #0 {
+define amdgpu_ps float @select_fneg_a_or_q_cmp_ult_a_neg8(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp ult float %a, -8.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 8.0
@@ -169,7 +169,7 @@ define amdgpu_ps float @select_fneg_a_or_q_cmp_ult_a_neg8(float %a, float %b) #0
 
 ; VI-NNAN: s_mov_b32 [[K:s[0-9]+]], 0x41000000
 ; VI-NNAN-NEXT: v_min_f32_e64 v0, -v0, [[K]]
-define amdgpu_ps float @select_fneg_a_or_q_cmp_ogt_a_neg8(float %a, float %b) #0 {
+define amdgpu_ps float @select_fneg_a_or_q_cmp_ogt_a_neg8(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp ogt float %a, -8.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 8.0
@@ -188,7 +188,7 @@ define amdgpu_ps float @select_fneg_a_or_q_cmp_ogt_a_neg8(float %a, float %b) #0
 
 ; VI-NNAN: s_mov_b32 [[K:s[0-9]+]], 0x41000000
 ; VI-NNAN-NEXT: v_max_f32_e64 v0, -v0, [[K]]
-define amdgpu_ps float @select_fneg_a_or_q_cmp_olt_a_neg8(float %a, float %b) #0 {
+define amdgpu_ps float @select_fneg_a_or_q_cmp_olt_a_neg8(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp olt float %a, -8.0
   %min.a = select i1 %cmp.a, float %fneg.a, float 8.0
@@ -202,7 +202,7 @@ define amdgpu_ps float @select_fneg_a_or_q_cmp_olt_a_neg8(float %a, float %b) #0
 ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -1.0, -v0, vcc
 
 ; VI-NNAN: v_max_f32_e64 v0, -v0, -1.0
-define amdgpu_ps float @select_fneg_a_or_neg1_cmp_olt_a_1(float %a, float %b) #0 {
+define amdgpu_ps float @select_fneg_a_or_neg1_cmp_olt_a_1(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp olt float %a, 1.0
   %min.a = select i1 %cmp.a, float %fneg.a, float -1.0
@@ -218,7 +218,7 @@ define amdgpu_ps float @select_fneg_a_or_neg1_cmp_olt_a_1(float %a, float %b) #0
 
 ; VI-NNAN: v_cmp_lt_f32_e32 vcc, v0, v1
 ; VI-NNAN-NEXT: v_cndmask_b32_e64 v0, v1, -v0, vcc
-define amdgpu_ps float @ult_a_select_fneg_a_b(float %a, float %b) #0 {
+define amdgpu_ps float @ult_a_select_fneg_a_b(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp ult float %a, %b
   %min.a = select i1 %cmp.a, float %fneg.a, float %b
@@ -234,12 +234,9 @@ define amdgpu_ps float @ult_a_select_fneg_a_b(float %a, float %b) #0 {
 
 ; VI-NNAN: v_cmp_gt_f32_e32 vcc, v0, v1
 ; VI-NNAN-NEXT: v_cndmask_b32_e64 v0, v1, -v0, vcc
-define amdgpu_ps float @ugt_a_select_fneg_a_b(float %a, float %b) #0 {
+define amdgpu_ps float @ugt_a_select_fneg_a_b(float %a, float %b) nounwind {
   %fneg.a = fneg float %a
   %cmp.a = fcmp ugt float %a, %b
   %min.a = select i1 %cmp.a, float %fneg.a, float %b
   ret float %min.a
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
index 0723290bdf734d..bf4e2fbcecc6f0 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
@@ -12,7 +12,7 @@
 ; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-NNAN %s
 
 
-define half @test_fmin_legacy_ule_f16(half %a, half %b) #0 {
+define half @test_fmin_legacy_ule_f16(half %a, half %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -76,7 +76,7 @@ define half @test_fmin_legacy_ule_f16(half %a, half %b) #0 {
   ret half %val
 }
 
-define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) #0 {
+define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -170,7 +170,7 @@ define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) #0 {
   ret <2 x half> %val
 }
 
-define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 {
+define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -283,7 +283,7 @@ define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 {
   ret <3 x half> %val
 }
 
-define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) #0 {
+define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -425,7 +425,7 @@ define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) #0 {
   ret <4 x half> %val
 }
 
-define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 {
+define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) nounwind {
 ; GFX9-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
 ; GFX9-SAFE:       ; %bb.0:
 ; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -660,5 +660,3 @@ define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 {
   %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b
   ret <8 x half> %val
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll
index d20c39d5103649..8c177c9fd02149 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
 
-define amdgpu_kernel void @test_fmin_legacy_uge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmin_legacy_uge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmin_legacy_uge_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -41,7 +41,7 @@ define amdgpu_kernel void @test_fmin_legacy_uge_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -54,7 +54,7 @@ define amdgpu_kernel void @test_fmin_legacy_uge_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmin_legacy_ugt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmin_legacy_ugt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmin_legacy_ugt_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -93,7 +93,7 @@ define amdgpu_kernel void @test_fmin_legacy_ugt_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -106,7 +106,7 @@ define amdgpu_kernel void @test_fmin_legacy_ugt_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmin_legacy_ule_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmin_legacy_ule_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmin_legacy_ule_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -145,7 +145,7 @@ define amdgpu_kernel void @test_fmin_legacy_ule_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -158,7 +158,7 @@ define amdgpu_kernel void @test_fmin_legacy_ule_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmin_legacy_ult_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmin_legacy_ult_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmin_legacy_ult_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -197,7 +197,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -210,7 +210,7 @@ define amdgpu_kernel void @test_fmin_legacy_ult_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmin_legacy_oge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmin_legacy_oge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmin_legacy_oge_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -249,7 +249,7 @@ define amdgpu_kernel void @test_fmin_legacy_oge_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -262,7 +262,7 @@ define amdgpu_kernel void @test_fmin_legacy_oge_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmin_legacy_ogt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmin_legacy_ogt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmin_legacy_ogt_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -301,7 +301,7 @@ define amdgpu_kernel void @test_fmin_legacy_ogt_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -314,7 +314,7 @@ define amdgpu_kernel void @test_fmin_legacy_ogt_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmin_legacy_ole_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmin_legacy_ole_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmin_legacy_ole_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -353,7 +353,7 @@ define amdgpu_kernel void @test_fmin_legacy_ole_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -366,7 +366,7 @@ define amdgpu_kernel void @test_fmin_legacy_ole_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @test_fmin_legacy_olt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_fmin_legacy_olt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_fmin_legacy_olt_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -405,7 +405,7 @@ define amdgpu_kernel void @test_fmin_legacy_olt_f64(ptr addrspace(1) %out, ptr a
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
@@ -418,7 +418,4 @@ define amdgpu_kernel void @test_fmin_legacy_olt_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
index 8e595a827c78d7..52243b7aa6d20e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -6,7 +6,7 @@
 
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope --check-prefixes=EG,FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; The two inputs to the instruction are different SGPRs from the same
 ; super register, so we can't fold both SGPR operands even though they
@@ -21,7 +21,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; VI-SAFE: v_cmp_nlt_f32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 
 ; VI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(ptr addrspace(1) %out, <4 x float> %reg0) #0 {
+define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(ptr addrspace(1) %out, <4 x float> %reg0) nounwind {
    %r0 = extractelement <4 x float> %reg0, i32 0
    %r1 = extractelement <4 x float> %reg0, i32 1
    %r2 = fcmp uge float %r0, %r1
@@ -46,7 +46,7 @@ define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(ptr addrspace(1)
 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[VB]], [[VA]]
 
 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, s[[#LOAD + 2]], [[VB]]
-define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(ptr addrspace(1) %out, float %a, float %b) nounwind {
   %cmp = fcmp ule float %a, %b
   %val = select i1 %cmp, float %a, float %b
   store float %val, ptr addrspace(1) %out, align 4
@@ -67,7 +67,7 @@ define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(ptr addrspace(1) %out, flo
 ; VI-SAFE: v_cndmask_b32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]], vcc
 
 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
-define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(ptr addrspace(1) %out, float %a, float %b) nounwind {
   %a.nnan = fadd nnan float %a, 1.0
   %b.nnan = fadd nnan float %b, 2.0
   %cmp = fcmp ule float %a.nnan, %b.nnan
@@ -86,8 +86,8 @@ define amdgpu_kernel void @s_test_fmin_legacy_ule_f32_nnan_src(ptr addrspace(1)
 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
 
 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-define amdgpu_kernel void @test_fmin_legacy_ule_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmin_legacy_ule_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -110,8 +110,8 @@ define amdgpu_kernel void @test_fmin_legacy_ule_f32(ptr addrspace(1) %out, ptr a
 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
 
 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-define amdgpu_kernel void @test_fmin_legacy_ole_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmin_legacy_ole_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -134,8 +134,8 @@ define amdgpu_kernel void @test_fmin_legacy_ole_f32(ptr addrspace(1) %out, ptr a
 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
 
 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-define amdgpu_kernel void @test_fmin_legacy_olt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmin_legacy_olt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -158,8 +158,8 @@ define amdgpu_kernel void @test_fmin_legacy_olt_f32(ptr addrspace(1) %out, ptr a
 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
 
 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-define amdgpu_kernel void @test_fmin_legacy_ult_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmin_legacy_ult_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -182,8 +182,8 @@ define amdgpu_kernel void @test_fmin_legacy_ult_f32(ptr addrspace(1) %out, ptr a
 ; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
 
 ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
-define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr <1 x float>, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr <1 x float>, ptr addrspace(1) %gep.0, i32 1
 
@@ -209,8 +209,8 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(ptr addrspace(1) %out, ptr
 
 ; GCN-NONAN: v_min_f32_e32
 ; GCN-NONAN: v_min_f32_e32
-define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr <2 x float>, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr <2 x float>, ptr addrspace(1) %gep.0, i32 1
 
@@ -242,8 +242,8 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(ptr addrspace(1) %out, ptr
 ; GCN-NONAN: v_min_f32_e32
 ; GCN-NONAN: v_min_f32_e32
 ; GCN-NONAN-NOT: v_min_
-define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr <3 x float>, ptr addrspace(1) %gep.0, i32 1
 
@@ -264,8 +264,8 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(ptr addrspace(1) %out, ptr
 ; GCN-NEXT: v_cndmask_b32
 ; GCN-NOT: v_min
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
@@ -278,6 +278,3 @@ define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(ptr addrspace(1) %
   store i1 %cmp, ptr addrspace(1) %out1
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.f64.ll b/llvm/test/CodeGen/AMDGPU/fminnum.f64.ll
index 077b76b8cb281d..3c8d41db6df03f 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.f64.ll
@@ -2,11 +2,11 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX678 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-declare double @llvm.minnum.f64(double, double) #0
-declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
-declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) #0
-declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0
-declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
+declare double @llvm.minnum.f64(double, double) nounwind readnone
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) nounwind readnone
+declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) nounwind readnone
+declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) nounwind readnone
+declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) nounwind readnone
 
 ; GCN-LABEL: {{^}}test_fmin_f64_ieee_noflush:
 ; GCN: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]]
@@ -16,8 +16,8 @@ declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
 ; GCN-DAG: v_max_f64 [[QUIETB:v\[[0-9]+:[0-9]+\]]], [[B]], [[B]]
 
 ; GCN: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[QUIETB]], [[QUIETA]]
-define amdgpu_kernel void @test_fmin_f64_ieee_noflush([8 x i32], double %a, [8 x i32], double %b) #1 {
-  %val = call double @llvm.minnum.f64(double %a, double %b) #0
+define amdgpu_kernel void @test_fmin_f64_ieee_noflush([8 x i32], double %a, [8 x i32], double %b) nounwind "denormal-fp-math"="ieee,ieee" {
+  %val = call double @llvm.minnum.f64(double %a, double %b) nounwind readnone
   store double %val, ptr addrspace(1) undef, align 8
   ret void
 }
@@ -32,8 +32,8 @@ define amdgpu_kernel void @test_fmin_f64_ieee_noflush([8 x i32], double %a, [8 x
 ; GFX9-DAG: v_max_f64 [[QUIETB:v\[[0-9]+:[0-9]+\]]], [[B]], [[B]]
 
 ; GCN: v_min_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[QUIETB]], [[QUIETA]]
-define amdgpu_kernel void @test_fmin_f64_ieee_flush([8 x i32], double %a, [8 x i32], double %b) #2 {
-  %val = call double @llvm.minnum.f64(double %a, double %b) #0
+define amdgpu_kernel void @test_fmin_f64_ieee_flush([8 x i32], double %a, [8 x i32], double %b) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
+  %val = call double @llvm.minnum.f64(double %a, double %b) nounwind readnone
   store double %val, ptr addrspace(1) undef, align 8
   ret void
 }
@@ -49,7 +49,7 @@ define amdgpu_kernel void @test_fmin_f64_ieee_flush([8 x i32], double %a, [8 x i
 define amdgpu_ps void @test_fmin_f64_no_ieee() nounwind {
   %a = load volatile double, ptr addrspace(3) undef
   %b = load volatile double, ptr addrspace(3) undef
-  %val = call double @llvm.minnum.f64(double %a, double %b) #0
+  %val = call double @llvm.minnum.f64(double %a, double %b) nounwind readnone
   store volatile double %val, ptr addrspace(3) undef
   ret void
 }
@@ -58,7 +58,7 @@ define amdgpu_ps void @test_fmin_f64_no_ieee() nounwind {
 ; GCN: v_min_f64
 ; GCN: v_min_f64
 define amdgpu_kernel void @test_fmin_v2f64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b) nounwind {
-  %val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0
+  %val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) nounwind readnone
   store <2 x double> %val, ptr addrspace(1) %out, align 16
   ret void
 }
@@ -69,7 +69,7 @@ define amdgpu_kernel void @test_fmin_v2f64(ptr addrspace(1) %out, <2 x double> %
 ; GCN: v_min_f64
 ; GCN: v_min_f64
 define amdgpu_kernel void @test_fmin_v4f64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b) nounwind {
-  %val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0
+  %val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) nounwind readnone
   store <4 x double> %val, ptr addrspace(1) %out, align 32
   ret void
 }
@@ -84,7 +84,7 @@ define amdgpu_kernel void @test_fmin_v4f64(ptr addrspace(1) %out, <4 x double> %
 ; GCN: v_min_f64
 ; GCN: v_min_f64
 define amdgpu_kernel void @test_fmin_v8f64(ptr addrspace(1) %out, <8 x double> %a, <8 x double> %b) nounwind {
-  %val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0
+  %val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) nounwind readnone
   store <8 x double> %val, ptr addrspace(1) %out, align 64
   ret void
 }
@@ -107,11 +107,7 @@ define amdgpu_kernel void @test_fmin_v8f64(ptr addrspace(1) %out, <8 x double> %
 ; GCN: v_min_f64
 ; GCN: v_min_f64
 define amdgpu_kernel void @test_fmin_v16f64(ptr addrspace(1) %out, <16 x double> %a, <16 x double> %b) nounwind {
-  %val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0
+  %val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) nounwind readnone
   store <16 x double> %val, ptr addrspace(1) %out, align 128
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "denormal-fp-math"="ieee,ieee" }
-attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.ll b/llvm/test/CodeGen/AMDGPU/fminnum.ll
index 65b311845a6b77..bd1e9c56f26890 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.ll
@@ -7,8 +7,8 @@
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[QUIET1]], [[QUIET0]]
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_fmin_f32_ieee_mode_on(ptr addrspace(1) %out, float %a, float %b) #0 {
-  %val = call float @llvm.minnum.f32(float %a, float %b) #1
+define amdgpu_kernel void @test_fmin_f32_ieee_mode_on(ptr addrspace(1) %out, float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
   store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -17,8 +17,8 @@ define amdgpu_kernel void @test_fmin_f32_ieee_mode_on(ptr addrspace(1) %out, flo
 ; GCN: s_waitcnt
 ; GCN-NEXT: v_min_f32_e32 v0, v0, v1
 ; GCN-NEXT: s_setpc_b64
-define float @test_fmin_nnan_f32_ieee_mode_on(float %a, float %b) #0 {
-  %val = call nnan float @llvm.minnum.f32(float %a, float %b) #1
+define float @test_fmin_nnan_f32_ieee_mode_on(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call nnan float @llvm.minnum.f32(float %a, float %b) nounwind readnone
   ret float %val
 }
 
@@ -27,23 +27,23 @@ define float @test_fmin_nnan_f32_ieee_mode_on(float %a, float %b) #0 {
 ; GCN-NOT: v1
 ; GCN: v_min_f32_e32 v0, v0, v1
 ; GCN-NEXT: ; return
-define amdgpu_ps float @test_fmin_nnan_f32_ieee_mode_off(float %a, float %b) #0 {
-  %val = call nnan float @llvm.minnum.f32(float %a, float %b) #1
+define amdgpu_ps float @test_fmin_nnan_f32_ieee_mode_off(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call nnan float @llvm.minnum.f32(float %a, float %b) nounwind readnone
   ret float %val
 }
 
 ; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_off:
 ; GCN: v_min_f32_e32 v0, v0, v1
 ; GCN-NEXT: ; return
-define amdgpu_ps float @test_fmin_f32_ieee_mode_off(float %a, float %b) #0 {
-  %val = call float @llvm.minnum.f32(float %a, float %b) #1
+define amdgpu_ps float @test_fmin_f32_ieee_mode_off(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
   ret float %val
 }
 
 ; GCN-LABEL: {{^}}test_fmin_v2f32:
 ; GCN: v_min_f32_e32
 ; GCN: v_min_f32_e32
-define amdgpu_kernel void @test_fmin_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
   store <2 x float> %val, ptr addrspace(1) %out, align 8
   ret void
@@ -54,7 +54,7 @@ define amdgpu_kernel void @test_fmin_v2f32(ptr addrspace(1) %out, <2 x float> %a
 ; GCN: v_min_f32_e32
 ; GCN: v_min_f32_e32
 ; GCN: v_min_f32_e32
-define amdgpu_kernel void @test_fmin_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
   store <4 x float> %val, ptr addrspace(1) %out, align 16
   ret void
@@ -69,7 +69,7 @@ define amdgpu_kernel void @test_fmin_v4f32(ptr addrspace(1) %out, <4 x float> %a
 ; GCN: v_min_f32_e32
 ; GCN: v_min_f32_e32
 ; GCN: v_min_f32_e32
-define amdgpu_kernel void @test_fmin_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b)
   store <8 x float> %val, ptr addrspace(1) %out, align 32
   ret void
@@ -92,7 +92,7 @@ define amdgpu_kernel void @test_fmin_v8f32(ptr addrspace(1) %out, <8 x float> %a
 ; GCN: v_min_f32_e32
 ; GCN: v_min_f32_e32
 ; GCN: v_min_f32_e32
-define amdgpu_kernel void @test_fmin_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b)
   store <16 x float> %val, ptr addrspace(1) %out, align 64
   ret void
@@ -102,7 +102,7 @@ define amdgpu_kernel void @test_fmin_v16f32(ptr addrspace(1) %out, <16 x float>
 ; GCN-NOT: v_min_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmin_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.minnum.f32(float 1.0, float 2.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -112,7 +112,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32(ptr addrspace(1) %out) #0 {
 ; GCN-NOT: v_min_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -122,7 +122,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(ptr addrspace(1) %out)
 ; GCN-NOT: v_min_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -132,7 +132,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(ptr addrspace(1) %out)
 ; GCN-NOT: v_min_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -142,7 +142,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(ptr addrspace(1) %out)
 ; GCN-NOT: v_min_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.minnum.f32(float 0.0, float 0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -152,7 +152,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #
 ; GCN-NOT: v_min_f32_e32
 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.minnum.f32(float 0.0, float -0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -162,7 +162,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #
 ; GCN-NOT: v_min_f32_e32
 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.minnum.f32(float -0.0, float 0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -172,7 +172,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(ptr addrspace(1) %out) #
 ; GCN-NOT: v_min_f32_e32
 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %val = call float @llvm.minnum.f32(float -0.0, float -0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -180,31 +180,31 @@ define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(ptr addrspace(1) %out) #
 
 ; GCN-LABEL: {{^}}fmin_var_immediate_f32_no_ieee:
 ; GCN: v_min_f32_e32 v0, 2.0, v0
-define amdgpu_ps float @fmin_var_immediate_f32_no_ieee(float %a) #0 {
-  %val = call float @llvm.minnum.f32(float %a, float 2.0) #1
+define amdgpu_ps float @fmin_var_immediate_f32_no_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.minnum.f32(float %a, float 2.0) nounwind readnone
   ret float %val
 }
 
 ; GCN-LABEL: {{^}}fmin_immediate_var_f32_no_ieee:
 ; GCN: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
-define amdgpu_ps float @fmin_immediate_var_f32_no_ieee(float inreg %a) #0 {
-  %val = call float @llvm.minnum.f32(float 2.0, float %a) #1
+define amdgpu_ps float @fmin_immediate_var_f32_no_ieee(float inreg %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.minnum.f32(float 2.0, float %a) nounwind readnone
   ret float %val
 }
 
 ; GCN-LABEL: {{^}}fmin_var_literal_f32_no_ieee:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
 ; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-define amdgpu_ps float @fmin_var_literal_f32_no_ieee(float inreg %a) #0 {
-  %val = call float @llvm.minnum.f32(float %a, float 99.0) #1
+define amdgpu_ps float @fmin_var_literal_f32_no_ieee(float inreg %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.minnum.f32(float %a, float 99.0) nounwind readnone
   ret float %val
 }
 
 ; GCN-LABEL: {{^}}fmin_literal_var_f32_no_ieee:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
 ; GCN: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-define amdgpu_ps float @fmin_literal_var_f32_no_ieee(float inreg %a) #0 {
-  %val = call float @llvm.minnum.f32(float 99.0, float %a) #1
+define amdgpu_ps float @fmin_literal_var_f32_no_ieee(float inreg %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
+  %val = call float @llvm.minnum.f32(float 99.0, float %a) nounwind readnone
   ret float %val
 }
 
@@ -214,16 +214,13 @@ define amdgpu_ps float @fmin_literal_var_f32_no_ieee(float inreg %a) #0 {
 ; GCN: v_min_f32_e32
 ; GCN-NOT: v_min_f32
 define <3 x float> @test_func_fmin_v3f32(<3 x float> %a, <3 x float> %b) nounwind {
-  %val = call <3 x float> @llvm.minnum.v3f32(<3 x float> %a, <3 x float> %b) #0
+  %val = call <3 x float> @llvm.minnum.v3f32(<3 x float> %a, <3 x float> %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret <3 x float> %val
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
-declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
-declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone }
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) nounwind readnone
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) nounwind readnone
+declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
index 22ccb718a3d285..732a5cf4078b0a 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
@@ -3,7 +3,7 @@
 ; EG-LABEL: {{^}}test_fmin_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @test_fmin_f32(ptr addrspace(1) %out, float %a, float %b) nounwind {
   %val = call float @llvm.minnum.f32(float %a, float %b)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -13,7 +13,7 @@ define amdgpu_kernel void @test_fmin_f32(ptr addrspace(1) %out, float %a, float
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) nounwind {
   %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
   store <2 x float> %val, ptr addrspace(1) %out, align 8
   ret void
@@ -25,7 +25,7 @@ define amdgpu_kernel void @test_fmin_v2f32(ptr addrspace(1) %out, <2 x float> %a
 ; EG: MIN_DX10 {{.*}}[[OUT]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) nounwind {
   %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
   store <4 x float> %val, ptr addrspace(1) %out, align 16
   ret void
@@ -42,7 +42,7 @@ define amdgpu_kernel void @test_fmin_v4f32(ptr addrspace(1) %out, <4 x float> %a
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
-define amdgpu_kernel void @test_fmin_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) nounwind {
   %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b)
   store <8 x float> %val, ptr addrspace(1) %out, align 32
   ret void
@@ -69,7 +69,7 @@ define amdgpu_kernel void @test_fmin_v8f32(ptr addrspace(1) %out, <8 x float> %a
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W
-define amdgpu_kernel void @test_fmin_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) nounwind {
   %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b)
   store <16 x float> %val, ptr addrspace(1) %out, align 64
   ret void
@@ -79,7 +79,7 @@ define amdgpu_kernel void @test_fmin_v16f32(ptr addrspace(1) %out, <16 x float>
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.minnum.f32(float 1.0, float 2.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -90,7 +90,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32(ptr addrspace(1) %out) #0 {
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 ; EG: 2143289344({{nan|1\.#QNAN0e\+00}})
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -100,7 +100,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(ptr addrspace(1) %out)
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -110,7 +110,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(ptr addrspace(1) %out)
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -120,7 +120,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(ptr addrspace(1) %out)
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.minnum.f32(float 0.0, float 0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -130,7 +130,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.minnum.f32(float 0.0, float -0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -140,7 +140,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.minnum.f32(float -0.0, float 0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -150,7 +150,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(ptr addrspace(1) %out) #
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(ptr addrspace(1) %out) nounwind {
   %val = call float @llvm.minnum.f32(float -0.0, float -0.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -159,7 +159,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(ptr addrspace(1) %out) #
 ; EG-LABEL: {{^}}fmin_var_immediate_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_var_immediate_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @fmin_var_immediate_f32(ptr addrspace(1) %out, float %a) nounwind {
   %val = call float @llvm.minnum.f32(float %a, float 2.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -168,7 +168,7 @@ define amdgpu_kernel void @fmin_var_immediate_f32(ptr addrspace(1) %out, float %
 ; EG-LABEL: {{^}}fmin_immediate_var_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_immediate_var_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @fmin_immediate_var_f32(ptr addrspace(1) %out, float %a) nounwind {
   %val = call float @llvm.minnum.f32(float 2.0, float %a)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -177,7 +177,7 @@ define amdgpu_kernel void @fmin_immediate_var_f32(ptr addrspace(1) %out, float %
 ; EG-LABEL: {{^}}fmin_var_literal_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_var_literal_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @fmin_var_literal_f32(ptr addrspace(1) %out, float %a) nounwind {
   %val = call float @llvm.minnum.f32(float %a, float 99.0)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
@@ -186,17 +186,14 @@ define amdgpu_kernel void @fmin_var_literal_f32(ptr addrspace(1) %out, float %a)
 ; EG-LABEL: {{^}}fmin_literal_var_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_literal_var_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @fmin_literal_var_f32(ptr addrspace(1) %out, float %a) nounwind {
   %val = call float @llvm.minnum.f32(float 99.0, float %a)
   store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
-declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
-declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
-declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) nounwind readnone
+declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
index 7830c91851bfa7..58632799fdfc3b 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -9,10 +9,10 @@
 ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
 ; make add an instruction if the fadd has more than one use.
 
-declare half @llvm.fabs.f16(half) #1
-declare float @llvm.fabs.f32(float) #1
+declare half @llvm.fabs.f16(half) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
 
-define amdgpu_kernel void @multiple_fadd_use_test_f32(ptr addrspace(1) %out, float %x, float %y, float %z) #0 {
+define amdgpu_kernel void @multiple_fadd_use_test_f32(ptr addrspace(1) %out, float %x, float %y, float %z) nounwind "unsafe-fp-math"="true" {
 ; VI-LABEL: multiple_fadd_use_test_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -76,7 +76,7 @@ define amdgpu_kernel void @multiple_fadd_use_test_f32(ptr addrspace(1) %out, flo
   ret void
 }
 
-define amdgpu_kernel void @multiple_use_fadd_fmac_f32(ptr addrspace(1) %out, float %x, [8 x i32], float %y) #0 {
+define amdgpu_kernel void @multiple_use_fadd_fmac_f32(ptr addrspace(1) %out, float %x, [8 x i32], float %y) nounwind "unsafe-fp-math"="true" {
 ; VI-LABEL: multiple_use_fadd_fmac_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -139,7 +139,7 @@ define amdgpu_kernel void @multiple_use_fadd_fmac_f32(ptr addrspace(1) %out, flo
   ret void
 }
 
-define amdgpu_kernel void @multiple_use_fadd_fmad_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
+define amdgpu_kernel void @multiple_use_fadd_fmad_f32(ptr addrspace(1) %out, float %x, float %y) nounwind "unsafe-fp-math"="true" {
 ; VI-LABEL: multiple_use_fadd_fmad_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -195,7 +195,7 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f32(ptr addrspace(1) %out, flo
   ret void
 }
 
-define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(ptr addrspace(1) %out, float %x, float %y, float %z) #0 {
+define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(ptr addrspace(1) %out, float %x, float %y, float %z) nounwind "unsafe-fp-math"="true" {
 ; VI-LABEL: multiple_use_fadd_multi_fmad_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
@@ -258,7 +258,7 @@ define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @fmul_x2_xn2_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
+define amdgpu_kernel void @fmul_x2_xn2_f32(ptr addrspace(1) %out, float %x, float %y) nounwind "unsafe-fp-math"="true" {
 ; VI-LABEL: fmul_x2_xn2_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
@@ -307,7 +307,7 @@ define amdgpu_kernel void @fmul_x2_xn2_f32(ptr addrspace(1) %out, float %x, floa
   ret void
 }
 
-define amdgpu_kernel void @fmul_x2_xn3_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
+define amdgpu_kernel void @fmul_x2_xn3_f32(ptr addrspace(1) %out, float %x, float %y) nounwind "unsafe-fp-math"="true" {
 ; VI-LABEL: fmul_x2_xn3_f32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
@@ -357,7 +357,7 @@ define amdgpu_kernel void @fmul_x2_xn3_f32(ptr addrspace(1) %out, float %x, floa
   ret void
 }
 
-define amdgpu_kernel void @multiple_fadd_use_test_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
+define amdgpu_kernel void @multiple_fadd_use_test_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) nounwind "unsafe-fp-math"="true" {
 ; VI-DENORM-LABEL: multiple_fadd_use_test_f16:
 ; VI-DENORM:       ; %bb.0:
 ; VI-DENORM-NEXT:    s_load_dword s2, s[4:5], 0x8
@@ -493,7 +493,7 @@ define amdgpu_kernel void @multiple_fadd_use_test_f16(ptr addrspace(1) %out, i16
   ret void
 }
 
-define amdgpu_kernel void @multiple_use_fadd_fmac_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
+define amdgpu_kernel void @multiple_use_fadd_fmac_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) nounwind "unsafe-fp-math"="true" {
 ; VI-DENORM-LABEL: multiple_use_fadd_fmac_f16:
 ; VI-DENORM:       ; %bb.0:
 ; VI-DENORM-NEXT:    s_load_dword s6, s[4:5], 0x8
@@ -614,7 +614,7 @@ define amdgpu_kernel void @multiple_use_fadd_fmac_f16(ptr addrspace(1) %out, i16
   ret void
 }
 
-define amdgpu_kernel void @multiple_use_fadd_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
+define amdgpu_kernel void @multiple_use_fadd_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) nounwind "unsafe-fp-math"="true" {
 ; VI-DENORM-LABEL: multiple_use_fadd_fmad_f16:
 ; VI-DENORM:       ; %bb.0:
 ; VI-DENORM-NEXT:    s_load_dword s6, s[4:5], 0x8
@@ -736,7 +736,7 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f16(ptr addrspace(1) %out, i16
   ret void
 }
 
-define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
+define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) nounwind "unsafe-fp-math"="true" {
 ; VI-DENORM-LABEL: multiple_use_fadd_multi_fmad_f16:
 ; VI-DENORM:       ; %bb.0:
 ; VI-DENORM-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
@@ -870,7 +870,7 @@ define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @fmul_x2_xn2_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
+define amdgpu_kernel void @fmul_x2_xn2_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) nounwind "unsafe-fp-math"="true" {
 ; VI-LABEL: fmul_x2_xn2_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
@@ -922,7 +922,7 @@ define amdgpu_kernel void @fmul_x2_xn2_f16(ptr addrspace(1) %out, i16 zeroext %x
   ret void
 }
 
-define amdgpu_kernel void @fmul_x2_xn3_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
+define amdgpu_kernel void @fmul_x2_xn3_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) nounwind "unsafe-fp-math"="true" {
 ; VI-LABEL: fmul_x2_xn3_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[4:5], 0x8
@@ -974,6 +974,3 @@ define amdgpu_kernel void @fmul_x2_xn3_f16(ptr addrspace(1) %out, i16 zeroext %x
   store volatile half %mul, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index 711a5fff1a0634..64324e8993296c 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -6765,7 +6765,7 @@ define <2 x half> @v_contract_mul_add_v2f16_select_4_128(<2 x i32> %arg, <2 x ha
 ; strictfp support
 ;---------------------------------------------------------------------
 
-define float @v_constrained_fmul_32_f32(float %x, float %y) #0 {
+define float @v_constrained_fmul_32_f32(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constrained_fmul_32_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6775,7 +6775,7 @@ define float @v_constrained_fmul_32_f32(float %x, float %y) #0 {
   ret float %val
 }
 
-define double @v_constrained_fmul_32_f64(double %x, double %y) #0 {
+define double @v_constrained_fmul_32_f64(double %x, double %y) strictfp {
 ; GFX9-SDAG-LABEL: v_constrained_fmul_32_f64:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6817,7 +6817,7 @@ define double @v_constrained_fmul_32_f64(double %x, double %y) #0 {
   ret double %val
 }
 
-define double @v_constrained_fmul_0x1p64_f64(double %x, double %y) #0 {
+define double @v_constrained_fmul_0x1p64_f64(double %x, double %y) strictfp {
 ; GFX9-SDAG-LABEL: v_constrained_fmul_0x1p64_f64:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6859,7 +6859,7 @@ define double @v_constrained_fmul_0x1p64_f64(double %x, double %y) #0 {
   ret double %val
 }
 
-define half @v_constrained_fmul_32_f16(half %x, half %y) #0 {
+define half @v_constrained_fmul_32_f16(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constrained_fmul_32_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7249,5 +7249,3 @@ define double @v_mul_fabs_8_f64(double %x) {
 declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata)
 declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/fmul.ll b/llvm/test/CodeGen/AMDGPU/fmul.ll
index cedf7c43ff7cf8..a6a77733e1d941 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul.ll
@@ -49,7 +49,7 @@ define amdgpu_kernel void @fmul_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GCN: v_mul_f32
 ; GCN-NOT: v_mul_f32
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_mul_2_k(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @test_mul_2_k(ptr addrspace(1) %out, float %x) nounwind {
   %y = fmul float %x, 2.0
   %z = fmul float %y, 3.0
   store float %z, ptr addrspace(1) %out
@@ -61,7 +61,7 @@ define amdgpu_kernel void @test_mul_2_k(ptr addrspace(1) %out, float %x) #0 {
 ; GCN-NOT: v_mul_f32
 ; GCN-NOT: v_mad_f32
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_mul_2_k_inv(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @test_mul_2_k_inv(ptr addrspace(1) %out, float %x) nounwind {
   %y = fmul float %x, 3.0
   %z = fmul float %y, 2.0
   store float %z, ptr addrspace(1) %out
@@ -75,7 +75,7 @@ define amdgpu_kernel void @test_mul_2_k_inv(ptr addrspace(1) %out, float %x) #0
 ; GCN: v_mul_f32
 ; GCN: v_mul_f32
 ; GCN-NOT: v_mul_f32
-define amdgpu_kernel void @test_mul_twouse(ptr addrspace(1) %out, float %x, float %y) #0 {
+define amdgpu_kernel void @test_mul_twouse(ptr addrspace(1) %out, float %x, float %y) nounwind {
   %a = fmul float %x, 5.0
   %b = fsub float -0.0, %a
   %c = fmul float %b, %y
@@ -83,5 +83,3 @@ define amdgpu_kernel void @test_mul_twouse(ptr addrspace(1) %out, float %x, floa
   store float %d, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
index 718be90eb75fc3..552272adb1c396 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
@@ -15,9 +15,9 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-DENORM,GFX11-DENORM-STRICT %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-DENORM,GFX11-DENORM-CONTRACT %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare half @llvm.fmuladd.f16(half, half, half) #1
-declare half @llvm.fabs.f16(half) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare half @llvm.fmuladd.f16(half, half, half) nounwind readnone
+declare half @llvm.fabs.f16(half) nounwind readnone
 
 define amdgpu_kernel void @fmuladd_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
 ; VI-FLUSH-LABEL: fmuladd_f16:
@@ -124,7 +124,7 @@ define amdgpu_kernel void @fmuladd_f16(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GFX11-DENORM-NEXT:    s_nop 0
 ; GFX11-DENORM-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-NEXT:    s_endpgm
-                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load half, ptr addrspace(1) %in1
   %r1 = load half, ptr addrspace(1) %in2
   %r2 = load half, ptr addrspace(1) %in3
@@ -273,7 +273,7 @@ define amdgpu_kernel void @fmul_fadd_f16(ptr addrspace(1) %out, ptr addrspace(1)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_nop 0
 ; GFX11-DENORM-CONTRACT-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_endpgm
-                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load half, ptr addrspace(1) %in1
   %r1 = load half, ptr addrspace(1) %in2
   %r2 = load half, ptr addrspace(1) %in3
@@ -388,7 +388,7 @@ define amdgpu_kernel void @fmul_fadd_contract_f16(ptr addrspace(1) %out, ptr add
 ; GFX11-DENORM-NEXT:    s_nop 0
 ; GFX11-DENORM-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-NEXT:    s_endpgm
-                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load half, ptr addrspace(1) %in1
   %r1 = load half, ptr addrspace(1) %in2
   %r2 = load half, ptr addrspace(1) %in3
@@ -398,7 +398,7 @@ define amdgpu_kernel void @fmul_fadd_contract_f16(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @fmuladd_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; VI-FLUSH-LABEL: fmuladd_2.0_a_b_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -506,7 +506,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @fmuladd_a_2.0_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_a_2.0_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; VI-FLUSH-LABEL: fmuladd_a_2.0_b_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -741,7 +741,7 @@ define amdgpu_kernel void @fadd_a_a_b_f16(ptr addrspace(1) %out,
 ; GFX11-DENORM-CONTRACT-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_endpgm
                             ptr addrspace(1) %in1,
-                            ptr addrspace(1) %in2) #0 {
+                            ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
@@ -883,7 +883,7 @@ define amdgpu_kernel void @fadd_b_a_a_f16(ptr addrspace(1) %out,
 ; GFX11-DENORM-CONTRACT-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_endpgm
                             ptr addrspace(1) %in1,
-                            ptr addrspace(1) %in2) #0 {
+                            ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
@@ -898,7 +898,7 @@ define amdgpu_kernel void @fadd_b_a_a_f16(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; VI-FLUSH-LABEL: fmuladd_neg_2.0_a_b_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1006,7 +1006,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; VI-FLUSH-LABEL: fmuladd_neg_2.0_neg_a_b_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1116,7 +1116,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; VI-FLUSH-LABEL: fmuladd_2.0_neg_a_b_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1226,7 +1226,7 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; VI-FLUSH-LABEL: fmuladd_2.0_a_neg_b_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1336,7 +1336,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @mad_sub_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_sub_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind readnone {
 ; VI-FLUSH-LABEL: mad_sub_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1488,7 +1488,7 @@ define amdgpu_kernel void @mad_sub_f16(ptr addrspace(1) noalias nocapture %out,
 ; GFX11-DENORM-CONTRACT-NEXT:    s_nop 0
 ; GFX11-DENORM-CONTRACT-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -1505,7 +1505,7 @@ define amdgpu_kernel void @mad_sub_f16(ptr addrspace(1) noalias nocapture %out,
   ret void
 }
 
-define amdgpu_kernel void @mad_sub_inv_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_sub_inv_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind readnone {
 ; VI-FLUSH-LABEL: mad_sub_inv_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1657,7 +1657,7 @@ define amdgpu_kernel void @mad_sub_inv_f16(ptr addrspace(1) noalias nocapture %o
 ; GFX11-DENORM-CONTRACT-NEXT:    s_nop 0
 ; GFX11-DENORM-CONTRACT-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -1674,7 +1674,7 @@ define amdgpu_kernel void @mad_sub_inv_f16(ptr addrspace(1) noalias nocapture %o
   ret void
 }
 
-define amdgpu_kernel void @mad_sub_fabs_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_sub_fabs_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind readnone {
 ; VI-FLUSH-LABEL: mad_sub_fabs_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1826,7 +1826,7 @@ define amdgpu_kernel void @mad_sub_fabs_f16(ptr addrspace(1) noalias nocapture %
 ; GFX11-DENORM-CONTRACT-NEXT:    s_nop 0
 ; GFX11-DENORM-CONTRACT-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -1837,14 +1837,14 @@ define amdgpu_kernel void @mad_sub_fabs_f16(ptr addrspace(1) noalias nocapture %
   %a = load volatile half, ptr addrspace(1) %gep0, align 2
   %b = load volatile half, ptr addrspace(1) %gep1, align 2
   %c = load volatile half, ptr addrspace(1) %gep2, align 2
-  %c.abs = call half @llvm.fabs.f16(half %c) #0
+  %c.abs = call half @llvm.fabs.f16(half %c) nounwind
   %mul = fmul half %a, %b
   %sub = fsub half %mul, %c.abs
   store half %sub, ptr addrspace(1) %outgep, align 2
   ret void
 }
 
-define amdgpu_kernel void @mad_sub_fabs_inv_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_sub_fabs_inv_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind readnone {
 ; VI-FLUSH-LABEL: mad_sub_fabs_inv_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1996,7 +1996,7 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f16(ptr addrspace(1) noalias nocaptu
 ; GFX11-DENORM-CONTRACT-NEXT:    s_nop 0
 ; GFX11-DENORM-CONTRACT-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -2007,14 +2007,14 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f16(ptr addrspace(1) noalias nocaptu
   %a = load volatile half, ptr addrspace(1) %gep0, align 2
   %b = load volatile half, ptr addrspace(1) %gep1, align 2
   %c = load volatile half, ptr addrspace(1) %gep2, align 2
-  %c.abs = call half @llvm.fabs.f16(half %c) #0
+  %c.abs = call half @llvm.fabs.f16(half %c) nounwind
   %mul = fmul half %a, %b
   %sub = fsub half %c.abs, %mul
   store half %sub, ptr addrspace(1) %outgep, align 2
   ret void
 }
 
-define amdgpu_kernel void @neg_neg_mad_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @neg_neg_mad_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind readnone {
 ; VI-FLUSH-LABEL: neg_neg_mad_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -2166,7 +2166,7 @@ define amdgpu_kernel void @neg_neg_mad_f16(ptr addrspace(1) noalias nocapture %o
 ; GFX11-DENORM-CONTRACT-NEXT:    s_nop 0
 ; GFX11-DENORM-CONTRACT-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -2185,7 +2185,7 @@ define amdgpu_kernel void @neg_neg_mad_f16(ptr addrspace(1) noalias nocapture %o
   ret void
 }
 
-define amdgpu_kernel void @mad_fabs_sub_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_fabs_sub_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind readnone {
 ; VI-FLUSH-LABEL: mad_fabs_sub_f16:
 ; VI-FLUSH:       ; %bb.0:
 ; VI-FLUSH-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -2337,7 +2337,7 @@ define amdgpu_kernel void @mad_fabs_sub_f16(ptr addrspace(1) noalias nocapture %
 ; GFX11-DENORM-CONTRACT-NEXT:    s_nop 0
 ; GFX11-DENORM-CONTRACT-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-DENORM-CONTRACT-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -2348,7 +2348,7 @@ define amdgpu_kernel void @mad_fabs_sub_f16(ptr addrspace(1) noalias nocapture %
   %a = load volatile half, ptr addrspace(1) %gep0, align 2
   %b = load volatile half, ptr addrspace(1) %gep1, align 2
   %c = load volatile half, ptr addrspace(1) %gep2, align 2
-  %b.abs = call half @llvm.fabs.f16(half %b) #0
+  %b.abs = call half @llvm.fabs.f16(half %b) nounwind
   %mul = fmul half %a, %b.abs
   %sub = fsub half %mul, %c
   store half %sub, ptr addrspace(1) %outgep, align 2
@@ -2636,6 +2636,3 @@ define amdgpu_kernel void @fsub_fadd_a_a_c_f16(ptr addrspace(1) %out, ptr addrsp
   store half %r3, ptr addrspace(1) %gep.out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
index 945973b2772897..899bfa73ed5ce9 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -25,10 +25,10 @@
 target triple = "amdgcn--"
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare half @llvm.fmuladd.f16(half, half, half) #1
-declare float @llvm.fabs.f32(float) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+declare half @llvm.fmuladd.f16(half, half, half) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
 
 ; GCN-LABEL: {{^}}fmuladd_f32:
 ; GCN-FLUSH-MAD: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
@@ -39,7 +39,7 @@ declare float @llvm.fabs.f32(float) #1
 ; GCN-DENORM-SLOWFMA: v_mul_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 define amdgpu_kernel void @fmuladd_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
-                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load float, ptr addrspace(1) %in1
   %r1 = load float, ptr addrspace(1) %in2
   %r2 = load float, ptr addrspace(1) %in3
@@ -59,7 +59,7 @@ define amdgpu_kernel void @fmuladd_f32(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GCN-DENORM-STRICT: v_mul_f32_e32
 ; GCN-DENORM-STRICT: v_add_f32_e32
 define amdgpu_kernel void @fmul_fadd_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
-                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load volatile float, ptr addrspace(1) %in1
   %r1 = load volatile float, ptr addrspace(1) %in2
   %r2 = load volatile float, ptr addrspace(1) %in3
@@ -77,7 +77,7 @@ define amdgpu_kernel void @fmul_fadd_f32(ptr addrspace(1) %out, ptr addrspace(1)
 
 ; GCN-DENORM-FASTFMA: v_fma_f32
 define amdgpu_kernel void @fmul_fadd_contract_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
-                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load volatile float, ptr addrspace(1) %in1
   %r1 = load volatile float, ptr addrspace(1) %in2
   %r2 = load volatile float, ptr addrspace(1) %in3
@@ -103,7 +103,7 @@ define amdgpu_kernel void @fmul_fadd_contract_f32(ptr addrspace(1) %out, ptr add
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -134,7 +134,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_a_2.0_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_a_2.0_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -169,7 +169,7 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f32(ptr addrspace(1) %out, ptr addrsp
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 define amdgpu_kernel void @fadd_a_a_b_f32(ptr addrspace(1) %out,
                             ptr addrspace(1) %in1,
-                            ptr addrspace(1) %in2) #0 {
+                            ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -205,7 +205,7 @@ define amdgpu_kernel void @fadd_a_a_b_f32(ptr addrspace(1) %out,
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 define amdgpu_kernel void @fadd_b_a_a_f32(ptr addrspace(1) %out,
                             ptr addrspace(1) %in1,
-                            ptr addrspace(1) %in2) #0 {
+                            ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -233,7 +233,7 @@ define amdgpu_kernel void @fadd_b_a_a_f32(ptr addrspace(1) %out,
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -265,7 +265,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(ptr addrspace(1) %out, ptr ad
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -298,7 +298,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, pt
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -330,7 +330,7 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr ad
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -362,8 +362,8 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(ptr addrspace(1) %out, ptr ad
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @mad_sub_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -397,8 +397,8 @@ define amdgpu_kernel void @mad_sub_f32(ptr addrspace(1) noalias nocapture %out,
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_inv_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @mad_sub_inv_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -431,8 +431,8 @@ define amdgpu_kernel void @mad_sub_inv_f32(ptr addrspace(1) noalias nocapture %o
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_fabs_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @mad_sub_fabs_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -443,7 +443,7 @@ define amdgpu_kernel void @mad_sub_fabs_f32(ptr addrspace(1) noalias nocapture %
   %a = load volatile float, ptr addrspace(1) %gep0, align 4
   %b = load volatile float, ptr addrspace(1) %gep1, align 4
   %c = load volatile float, ptr addrspace(1) %gep2, align 4
-  %c.abs = call float @llvm.fabs.f32(float %c) #0
+  %c.abs = call float @llvm.fabs.f32(float %c) nounwind
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c.abs
   store float %sub, ptr addrspace(1) %outgep, align 4
@@ -467,8 +467,8 @@ define amdgpu_kernel void @mad_sub_fabs_f32(ptr addrspace(1) noalias nocapture %
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_fabs_inv_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @mad_sub_fabs_inv_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -479,7 +479,7 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f32(ptr addrspace(1) noalias nocaptu
   %a = load volatile float, ptr addrspace(1) %gep0, align 4
   %b = load volatile float, ptr addrspace(1) %gep1, align 4
   %c = load volatile float, ptr addrspace(1) %gep2, align 4
-  %c.abs = call float @llvm.fabs.f32(float %c) #0
+  %c.abs = call float @llvm.fabs.f32(float %c) nounwind
   %mul = fmul float %a, %b
   %sub = fsub float %c.abs, %mul
   store float %sub, ptr addrspace(1) %outgep, align 4
@@ -505,8 +505,8 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f32(ptr addrspace(1) noalias nocaptu
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @neg_neg_mad_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @neg_neg_mad_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -541,8 +541,8 @@ define amdgpu_kernel void @neg_neg_mad_f32(ptr addrspace(1) noalias nocapture %o
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_fabs_sub_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @mad_fabs_sub_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -553,7 +553,7 @@ define amdgpu_kernel void @mad_fabs_sub_f32(ptr addrspace(1) noalias nocapture %
   %a = load volatile float, ptr addrspace(1) %gep0, align 4
   %b = load volatile float, ptr addrspace(1) %gep1, align 4
   %c = load volatile float, ptr addrspace(1) %gep2, align 4
-  %b.abs = call float @llvm.fabs.f32(float %b) #0
+  %b.abs = call float @llvm.fabs.f32(float %b) nounwind
   %mul = fmul float %a, %b.abs
   %sub = fsub float %mul, %c
   store float %sub, ptr addrspace(1) %outgep, align 4
@@ -577,7 +577,7 @@ define amdgpu_kernel void @mad_fabs_sub_f32(ptr addrspace(1) noalias nocapture %
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fsub_c_fadd_a_a_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fsub_c_fadd_a_a_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -608,7 +608,7 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f32(ptr addrspace(1) %out, ptr addrsp
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fsub_fadd_a_a_c_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fsub_fadd_a_a_c_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -623,6 +623,3 @@ define amdgpu_kernel void @fsub_fadd_a_a_c_f32(ptr addrspace(1) %out, ptr addrsp
   store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
index ff89cc21b56dab..d0e368cc07fe9d 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
@@ -8,7 +8,7 @@
 ; GCN-LABEL: {{^}}fmuladd_f64:
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fmuladd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
-                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load double, ptr addrspace(1) %in1
   %r1 = load double, ptr addrspace(1) %in2
   %r2 = load double, ptr addrspace(1) %in3
@@ -23,7 +23,7 @@ define amdgpu_kernel void @fmuladd_f64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GCN-STRICT: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; GCN-STRICT: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fmul_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
-                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load double, ptr addrspace(1) %in1
   %r1 = load double, ptr addrspace(1) %in2
   %r2 = load double, ptr addrspace(1) %in3
@@ -37,7 +37,7 @@ define amdgpu_kernel void @fmul_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1)
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 
 define amdgpu_kernel void @fmul_fadd_contract_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
-                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load double, ptr addrspace(1) %in1
   %r1 = load double, ptr addrspace(1) %in2
   %r2 = load double, ptr addrspace(1) %in3
@@ -60,7 +60,7 @@ define amdgpu_kernel void @fmul_fadd_contract_f64(ptr addrspace(1) %out, ptr add
 ; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 define amdgpu_kernel void @fadd_a_a_b_f64(ptr addrspace(1) %out,
                             ptr addrspace(1) %in1,
-                            ptr addrspace(1) %in2) #0 {
+                            ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
@@ -88,7 +88,7 @@ define amdgpu_kernel void @fadd_a_a_b_f64(ptr addrspace(1) %out,
 ; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 define amdgpu_kernel void @fadd_b_a_a_f64(ptr addrspace(1) %out,
                             ptr addrspace(1) %in1,
-                            ptr addrspace(1) %in2) #0 {
+                            ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
@@ -108,8 +108,8 @@ define amdgpu_kernel void @fadd_b_a_a_f64(ptr addrspace(1) %out,
 ; GCN-STRICT: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
 
 ; GCN-CONTRACT: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @mad_sub_f64(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @mad_sub_f64(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) nounwind readnone {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr double, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -133,7 +133,7 @@ define amdgpu_kernel void @mad_sub_f64(ptr addrspace(1) noalias nocapture %out,
 ; GCN-CONTRACT: v_fma_f64
 define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(ptr addrspace(1) %out,
                                       ptr addrspace(1) %in1,
-                                      ptr addrspace(1) %in2) #0 {
+                                      ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
@@ -155,7 +155,7 @@ define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(ptr addrspace(1) %out,
 ; GCN-CONTRACT: v_fma_f64
 define amdgpu_kernel void @fadd_a_a_b_f64_fast_add1(ptr addrspace(1) %out,
                                       ptr addrspace(1) %in1,
-                                      ptr addrspace(1) %in2) #0 {
+                                      ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
@@ -174,7 +174,7 @@ define amdgpu_kernel void @fadd_a_a_b_f64_fast_add1(ptr addrspace(1) %out,
 ; GCN: v_fma_f64
 define amdgpu_kernel void @fadd_a_a_b_f64_fast(ptr addrspace(1) %out,
                                  ptr addrspace(1) %in1,
-                                ptr addrspace(1) %in2) #0 {
+                                ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
@@ -189,8 +189,5 @@ define amdgpu_kernel void @fadd_a_a_b_f64_fast(ptr addrspace(1) %out,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare double @llvm.fmuladd.f64(double, double, double) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
index 0c40fe0215b5dc..7d6fbdbba8c528 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
@@ -8,9 +8,9 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-DENORM-CONTRACT,GFX9-DENORM %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-DENORM-CONTRACT,GFX9-DENORM %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) nounwind readnone
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nounwind readnone
 
 ; GCN-LABEL: {{^}}fmuladd_v2f16:
 ; GFX9-FLUSH: v_pk_mul_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
@@ -18,7 +18,7 @@ declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
 
 ; GFX9-DENORM: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 define amdgpu_kernel void @fmuladd_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
-                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load <2 x half>, ptr addrspace(1) %in1
   %r1 = load <2 x half>, ptr addrspace(1) %in2
   %r2 = load <2 x half>, ptr addrspace(1) %in3
@@ -33,7 +33,7 @@ define amdgpu_kernel void @fmuladd_v2f16(ptr addrspace(1) %out, ptr addrspace(1)
 
 ; GFX9-DENORM-CONTRACT: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 define amdgpu_kernel void @fmul_fadd_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
-                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load <2 x half>, ptr addrspace(1) %in1
   %r1 = load <2 x half>, ptr addrspace(1) %in2
   %r2 = load <2 x half>, ptr addrspace(1) %in3
@@ -49,7 +49,7 @@ define amdgpu_kernel void @fmul_fadd_v2f16(ptr addrspace(1) %out, ptr addrspace(
 
 ; GFX9-DENORM: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 define amdgpu_kernel void @fmul_fadd_contract_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
-                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) nounwind {
   %r0 = load <2 x half>, ptr addrspace(1) %in1
   %r1 = load <2 x half>, ptr addrspace(1) %in2
   %r2 = load <2 x half>, ptr addrspace(1) %in3
@@ -70,7 +70,7 @@ define amdgpu_kernel void @fmul_fadd_contract_v2f16(ptr addrspace(1) %out, ptr a
 
 ; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
 ; GFX9-DENORM: global_store_dword v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr <2 x half>, ptr addrspace(1) %gep.0, i32 1
@@ -94,7 +94,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(ptr addrspace(1) %out, ptr addr
 
 ; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
 ; GFX9-DENORM: global_store_dword v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr <2 x half>, ptr addrspace(1) %gep.0, i32 1
@@ -122,7 +122,7 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(ptr addrspace(1) %out, ptr addr
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[RESULT]]
 define amdgpu_kernel void @fadd_a_a_b_v2f16(ptr addrspace(1) %out,
                             ptr addrspace(1) %in1,
-                            ptr addrspace(1) %in2) #0 {
+                            ptr addrspace(1) %in2) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.0 = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
   %gep.1 = getelementptr <2 x half>, ptr addrspace(1) %gep.0, i32 1
@@ -136,6 +136,3 @@ define amdgpu_kernel void @fadd_a_a_b_v2f16(ptr addrspace(1) %out,
   store <2 x half> %add.1, ptr addrspace(1) %gep.out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fnearbyint.ll b/llvm/test/CodeGen/AMDGPU/fnearbyint.ll
index f411a76e75ab69..343194b7cbe575 100644
--- a/llvm/test/CodeGen/AMDGPU/fnearbyint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fnearbyint.ll
@@ -4,15 +4,15 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-declare half @llvm.nearbyint.f16(half) #0
-declare float @llvm.nearbyint.f32(float) #0
-declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #0
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #0
-declare double @llvm.nearbyint.f64(double) #0
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
-declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0
+declare half @llvm.nearbyint.f16(half) nounwind readonly
+declare float @llvm.nearbyint.f32(float) nounwind readonly
+declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) nounwind readonly
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readonly
+declare double @llvm.nearbyint.f64(double) nounwind readonly
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) nounwind readonly
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readonly
 
-define amdgpu_kernel void @fnearbyint_f16(ptr addrspace(1) %out, half %in) #1 {
+define amdgpu_kernel void @fnearbyint_f16(ptr addrspace(1) %out, half %in) nounwind {
 ; SI-LABEL: fnearbyint_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -67,7 +67,7 @@ define amdgpu_kernel void @fnearbyint_f16(ptr addrspace(1) %out, half %in) #1 {
   ret void
 }
 
-define amdgpu_kernel void @fnearbyint_f32(ptr addrspace(1) %out, float %in) #1 {
+define amdgpu_kernel void @fnearbyint_f32(ptr addrspace(1) %out, float %in) nounwind {
 ; SICI-LABEL: fnearbyint_f32:
 ; SICI:       ; %bb.0: ; %entry
 ; SICI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -108,7 +108,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @fnearbyint_v2f32(ptr addrspace(1) %out, <2 x float> %in) #1 {
+define amdgpu_kernel void @fnearbyint_v2f32(ptr addrspace(1) %out, <2 x float> %in) nounwind {
 ; SICI-LABEL: fnearbyint_v2f32:
 ; SICI:       ; %bb.0: ; %entry
 ; SICI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -150,7 +150,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @fnearbyint_v4f32(ptr addrspace(1) %out, <4 x float> %in) #1 {
+define amdgpu_kernel void @fnearbyint_v4f32(ptr addrspace(1) %out, <4 x float> %in) nounwind {
 ; SICI-LABEL: fnearbyint_v4f32:
 ; SICI:       ; %bb.0: ; %entry
 ; SICI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -444,6 +444,3 @@ entry:
   store <4 x double> %0, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index b32630a97b3ad0..f87fbab8ff4683 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -12,7 +12,7 @@
 ; fadd tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_add_f16(half %a, half %b) #0 {
+define half @v_fneg_add_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -65,7 +65,7 @@ define half @v_fneg_add_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define { half, half } @v_fneg_add_store_use_add_f16(half %a, half %b) #0 {
+define { half, half } @v_fneg_add_store_use_add_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_add_store_use_add_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -98,7 +98,7 @@ define { half, half } @v_fneg_add_store_use_add_f16(half %a, half %b) #0 {
   ret { half, half } %insert.1
 }
 
-define { half, half } @v_fneg_add_multi_use_add_f16(half %a, half %b) #0 {
+define { half, half } @v_fneg_add_multi_use_add_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_multi_use_add_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -162,7 +162,7 @@ define { half, half } @v_fneg_add_multi_use_add_f16(half %a, half %b) #0 {
   ret { half, half } %insert.1
 }
 
-define half @v_fneg_add_fneg_x_f16(half %a, half %b) #0 {
+define half @v_fneg_add_fneg_x_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_fneg_x_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -216,7 +216,7 @@ define half @v_fneg_add_fneg_x_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_add_x_fneg_f16(half %a, half %b) #0 {
+define half @v_fneg_add_x_fneg_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_x_fneg_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -270,7 +270,7 @@ define half @v_fneg_add_x_fneg_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_add_fneg_fneg_f16(half %a, half %b) #0 {
+define half @v_fneg_add_fneg_fneg_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_fneg_fneg_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -325,7 +325,7 @@ define half @v_fneg_add_fneg_fneg_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define { half, half } @v_fneg_add_store_use_fneg_x_f16(half %a, half %b) #0 {
+define { half, half } @v_fneg_add_store_use_fneg_x_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -392,7 +392,7 @@ define { half, half } @v_fneg_add_store_use_fneg_x_f16(half %a, half %b) #0 {
   ret { half, half } %insert.1
 }
 
-define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c) #0 {
+define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -468,7 +468,7 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c
 }
 
 ; This one asserted with -enable-no-signed-zeros-fp-math
-define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #0 {
+define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: fneg_fadd_0_f16:
 ; SI-SAFE:       ; %bb.0: ; %.entry
 ; SI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
@@ -594,7 +594,7 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
 ; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
 ; function attribute unsafe-fp-math automatically. Combine with the previous test
 ; when that is done.
-define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #2 {
+define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) nounwind "unsafe-fp-math"="true" {
 ; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
 ; SI-SAFE:       ; %bb.0: ; %.entry
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, s0
@@ -684,7 +684,7 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
 ; fmul tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_mul_f16(half %a, half %b) #0 {
+define half @v_fneg_mul_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_mul_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -711,7 +711,7 @@ define half @v_fneg_mul_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define { half, half } @v_fneg_mul_store_use_mul_f16(half %a, half %b) #0 {
+define { half, half } @v_fneg_mul_store_use_mul_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_mul_store_use_mul_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -744,7 +744,7 @@ define { half, half } @v_fneg_mul_store_use_mul_f16(half %a, half %b) #0 {
   ret { half, half } %insert.1
 }
 
-define { half, half } @v_fneg_mul_multi_use_mul_f16(half %a, half %b) #0 {
+define { half, half } @v_fneg_mul_multi_use_mul_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_mul_multi_use_mul_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -778,7 +778,7 @@ define { half, half } @v_fneg_mul_multi_use_mul_f16(half %a, half %b) #0 {
   ret { half, half } %insert.1
 }
 
-define half @v_fneg_mul_fneg_x_f16(half %a, half %b) #0 {
+define half @v_fneg_mul_fneg_x_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_mul_fneg_x_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -806,7 +806,7 @@ define half @v_fneg_mul_fneg_x_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_mul_x_fneg_f16(half %a, half %b) #0 {
+define half @v_fneg_mul_x_fneg_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_mul_x_fneg_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -834,7 +834,7 @@ define half @v_fneg_mul_x_fneg_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_mul_fneg_fneg_f16(half %a, half %b) #0 {
+define half @v_fneg_mul_fneg_fneg_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_mul_fneg_fneg_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -863,7 +863,7 @@ define half @v_fneg_mul_fneg_fneg_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define { half, half } @v_fneg_mul_store_use_fneg_x_f16(half %a, half %b) #0 {
+define { half, half } @v_fneg_mul_store_use_fneg_x_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_mul_store_use_fneg_x_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -899,7 +899,7 @@ define { half, half } @v_fneg_mul_store_use_fneg_x_f16(half %a, half %b) #0 {
   ret { half, half } %insert.1
 }
 
-define { half, half } @v_fneg_mul_multi_use_fneg_x_f16(half %a, half %b, half %c) #0 {
+define { half, half } @v_fneg_mul_multi_use_fneg_x_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -943,7 +943,7 @@ define { half, half } @v_fneg_mul_multi_use_fneg_x_f16(half %a, half %b, half %c
 ; fminnum tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_minnum_f16_ieee(half %a, half %b) #0 {
+define half @v_fneg_minnum_f16_ieee(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_minnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -975,7 +975,7 @@ define half @v_fneg_minnum_f16_ieee(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_minnum_f16_no_ieee(half %a, half %b) #4 {
+define half @v_fneg_minnum_f16_no_ieee(half %a, half %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_minnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1002,7 +1002,7 @@ define half @v_fneg_minnum_f16_no_ieee(half %a, half %b) #4 {
   ret half %fneg
 }
 
-define half @v_fneg_self_minnum_f16_ieee(half %a) #0 {
+define half @v_fneg_self_minnum_f16_ieee(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_self_minnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1025,7 +1025,7 @@ define half @v_fneg_self_minnum_f16_ieee(half %a) #0 {
   ret half %min.fneg
 }
 
-define half @v_fneg_self_minnum_f16_no_ieee(half %a) #4 {
+define half @v_fneg_self_minnum_f16_no_ieee(half %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_self_minnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1048,7 +1048,7 @@ define half @v_fneg_self_minnum_f16_no_ieee(half %a) #4 {
   ret half %min.fneg
 }
 
-define half @v_fneg_posk_minnum_f16_ieee(half %a) #0 {
+define half @v_fneg_posk_minnum_f16_ieee(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_posk_minnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1076,7 +1076,7 @@ define half @v_fneg_posk_minnum_f16_ieee(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_posk_minnum_f16_no_ieee(half %a) #4 {
+define half @v_fneg_posk_minnum_f16_no_ieee(half %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_posk_minnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1101,7 +1101,7 @@ define half @v_fneg_posk_minnum_f16_no_ieee(half %a) #4 {
   ret half %fneg
 }
 
-define half @v_fneg_negk_minnum_f16_ieee(half %a) #0 {
+define half @v_fneg_negk_minnum_f16_ieee(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_negk_minnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1129,7 +1129,7 @@ define half @v_fneg_negk_minnum_f16_ieee(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_negk_minnum_f16_no_ieee(half %a) #4 {
+define half @v_fneg_negk_minnum_f16_no_ieee(half %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_negk_minnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1154,7 +1154,7 @@ define half @v_fneg_negk_minnum_f16_no_ieee(half %a) #4 {
   ret half %fneg
 }
 
-define half @v_fneg_0_minnum_f16(half %a) #0 {
+define half @v_fneg_0_minnum_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_0_minnum_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1183,7 +1183,7 @@ define half @v_fneg_0_minnum_f16(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_neg0_minnum_f16_ieee(half %a) #0 {
+define half @v_fneg_neg0_minnum_f16_ieee(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_neg0_minnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1211,7 +1211,7 @@ define half @v_fneg_neg0_minnum_f16_ieee(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
+define half @v_fneg_inv2pi_minnum_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_inv2pi_minnum_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1241,7 +1241,7 @@ define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
+define half @v_fneg_neg_inv2pi_minnum_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_neg_inv2pi_minnum_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1271,7 +1271,7 @@ define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_neg0_minnum_f16_no_ieee(half %a) #4 {
+define half @v_fneg_neg0_minnum_f16_no_ieee(half %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1296,7 +1296,7 @@ define half @v_fneg_neg0_minnum_f16_no_ieee(half %a) #4 {
   ret half %fneg
 }
 
-define half @v_fneg_0_minnum_foldable_use_f16_ieee(half %a, half %b) #0 {
+define half @v_fneg_0_minnum_foldable_use_f16_ieee(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1330,7 +1330,7 @@ define half @v_fneg_0_minnum_foldable_use_f16_ieee(half %a, half %b) #0 {
   ret half %mul
 }
 
-define half @v_fneg_inv2pi_minnum_foldable_use_f16(half %a, half %b) #0 {
+define half @v_fneg_inv2pi_minnum_foldable_use_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1364,7 +1364,7 @@ define half @v_fneg_inv2pi_minnum_foldable_use_f16(half %a, half %b) #0 {
   ret half %mul
 }
 
-define half @v_fneg_0_minnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
+define half @v_fneg_0_minnum_foldable_use_f16_no_ieee(half %a, half %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1396,7 +1396,7 @@ define half @v_fneg_0_minnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
   ret half %mul
 }
 
-define { half, half } @v_fneg_minnum_multi_use_minnum_f16_ieee(half %a, half %b) #0 {
+define { half, half } @v_fneg_minnum_multi_use_minnum_f16_ieee(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1434,7 +1434,7 @@ define { half, half } @v_fneg_minnum_multi_use_minnum_f16_ieee(half %a, half %b)
   ret { half, half } %insert.1
 }
 
-define <2 x half> @v_fneg_minnum_multi_use_minnum_f16_no_ieee(half %a, half %b) #4 {
+define <2 x half> @v_fneg_minnum_multi_use_minnum_f16_no_ieee(half %a, half %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1475,7 +1475,7 @@ define <2 x half> @v_fneg_minnum_multi_use_minnum_f16_no_ieee(half %a, half %b)
 ; fmaxnum tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_maxnum_f16_ieee(half %a, half %b) #0 {
+define half @v_fneg_maxnum_f16_ieee(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_maxnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1507,7 +1507,7 @@ define half @v_fneg_maxnum_f16_ieee(half %a, half %b) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_maxnum_f16_no_ieee(half %a, half %b) #4 {
+define half @v_fneg_maxnum_f16_no_ieee(half %a, half %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_maxnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1534,7 +1534,7 @@ define half @v_fneg_maxnum_f16_no_ieee(half %a, half %b) #4 {
   ret half %fneg
 }
 
-define half @v_fneg_self_maxnum_f16_ieee(half %a) #0 {
+define half @v_fneg_self_maxnum_f16_ieee(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_self_maxnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1557,7 +1557,7 @@ define half @v_fneg_self_maxnum_f16_ieee(half %a) #0 {
   ret half %max.fneg
 }
 
-define half @v_fneg_self_maxnum_f16_no_ieee(half %a) #4 {
+define half @v_fneg_self_maxnum_f16_no_ieee(half %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_self_maxnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1580,7 +1580,7 @@ define half @v_fneg_self_maxnum_f16_no_ieee(half %a) #4 {
   ret half %max.fneg
 }
 
-define half @v_fneg_posk_maxnum_f16_ieee(half %a) #0 {
+define half @v_fneg_posk_maxnum_f16_ieee(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_posk_maxnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1608,7 +1608,7 @@ define half @v_fneg_posk_maxnum_f16_ieee(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_posk_maxnum_f16_no_ieee(half %a) #4 {
+define half @v_fneg_posk_maxnum_f16_no_ieee(half %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1633,7 +1633,7 @@ define half @v_fneg_posk_maxnum_f16_no_ieee(half %a) #4 {
   ret half %fneg
 }
 
-define half @v_fneg_negk_maxnum_f16_ieee(half %a) #0 {
+define half @v_fneg_negk_maxnum_f16_ieee(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_negk_maxnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1661,7 +1661,7 @@ define half @v_fneg_negk_maxnum_f16_ieee(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_negk_maxnum_f16_no_ieee(half %a) #4 {
+define half @v_fneg_negk_maxnum_f16_no_ieee(half %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1686,7 +1686,7 @@ define half @v_fneg_negk_maxnum_f16_no_ieee(half %a) #4 {
   ret half %fneg
 }
 
-define half @v_fneg_0_maxnum_f16(half %a) #0 {
+define half @v_fneg_0_maxnum_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_0_maxnum_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1715,7 +1715,7 @@ define half @v_fneg_0_maxnum_f16(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_neg0_maxnum_f16_ieee(half %a) #0 {
+define half @v_fneg_neg0_maxnum_f16_ieee(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_neg0_maxnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1743,7 +1743,7 @@ define half @v_fneg_neg0_maxnum_f16_ieee(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_neg0_maxnum_f16_no_ieee(half %a) #4 {
+define half @v_fneg_neg0_maxnum_f16_no_ieee(half %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1768,7 +1768,7 @@ define half @v_fneg_neg0_maxnum_f16_no_ieee(half %a) #4 {
   ret half %fneg
 }
 
-define half @v_fneg_0_maxnum_foldable_use_f16_ieee(half %a, half %b) #0 {
+define half @v_fneg_0_maxnum_foldable_use_f16_ieee(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1802,7 +1802,7 @@ define half @v_fneg_0_maxnum_foldable_use_f16_ieee(half %a, half %b) #0 {
   ret half %mul
 }
 
-define half @v_fneg_0_maxnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
+define half @v_fneg_0_maxnum_foldable_use_f16_no_ieee(half %a, half %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1834,7 +1834,7 @@ define half @v_fneg_0_maxnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
   ret half %mul
 }
 
-define { half, half } @v_fneg_maxnum_multi_use_maxnum_f16_ieee(half %a, half %b) #0 {
+define { half, half } @v_fneg_maxnum_multi_use_maxnum_f16_ieee(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1872,7 +1872,7 @@ define { half, half } @v_fneg_maxnum_multi_use_maxnum_f16_ieee(half %a, half %b)
   ret { half, half } %insert.1
 }
 
-define <2 x half> @v_fneg_maxnum_multi_use_maxnum_f16_no_ieee(half %a, half %b) #4 {
+define <2 x half> @v_fneg_maxnum_multi_use_maxnum_f16_no_ieee(half %a, half %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1913,7 +1913,7 @@ define <2 x half> @v_fneg_maxnum_multi_use_maxnum_f16_no_ieee(half %a, half %b)
 ; fma tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_fma_f16(half %a, half %b, half %c) #0 {
+define half @v_fneg_fma_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fma_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1970,7 +1970,7 @@ define half @v_fneg_fma_f16(half %a, half %b, half %c) #0 {
   ret half %fneg
 }
 
-define { half, half } @v_fneg_fma_store_use_fma_f16(half %a, half %b, half %c) #0 {
+define { half, half } @v_fneg_fma_store_use_fma_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fma_store_use_fma_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2005,7 +2005,7 @@ define { half, half } @v_fneg_fma_store_use_fma_f16(half %a, half %b, half %c) #
   ret { half, half } %insert.1
 }
 
-define { half, half } @v_fneg_fma_multi_use_fma_f16(half %a, half %b, half %c) #0 {
+define { half, half } @v_fneg_fma_multi_use_fma_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fma_multi_use_fma_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2072,7 +2072,7 @@ define { half, half } @v_fneg_fma_multi_use_fma_f16(half %a, half %b, half %c) #
   ret { half, half } %insert.1
 }
 
-define half @v_fneg_fma_fneg_x_y_f16(half %a, half %b, half %c) #0 {
+define half @v_fneg_fma_fneg_x_y_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fma_fneg_x_y_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2130,7 +2130,7 @@ define half @v_fneg_fma_fneg_x_y_f16(half %a, half %b, half %c) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_fma_x_fneg_y_f16(half %a, half %b, half %c) #0 {
+define half @v_fneg_fma_x_fneg_y_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fma_x_fneg_y_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2188,7 +2188,7 @@ define half @v_fneg_fma_x_fneg_y_f16(half %a, half %b, half %c) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_fma_fneg_fneg_y_f16(half %a, half %b, half %c) #0 {
+define half @v_fneg_fma_fneg_fneg_y_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2247,7 +2247,7 @@ define half @v_fneg_fma_fneg_fneg_y_f16(half %a, half %b, half %c) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_fma_fneg_x_fneg_f16(half %a, half %b, half %c) #0 {
+define half @v_fneg_fma_fneg_x_fneg_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2306,7 +2306,7 @@ define half @v_fneg_fma_fneg_x_fneg_f16(half %a, half %b, half %c) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_fma_x_y_fneg_f16(half %a, half %b, half %c) #0 {
+define half @v_fneg_fma_x_y_fneg_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fma_x_y_fneg_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2364,7 +2364,7 @@ define half @v_fneg_fma_x_y_fneg_f16(half %a, half %b, half %c) #0 {
   ret half %fneg
 }
 
-define { half, half } @v_fneg_fma_store_use_fneg_x_y_f16(half %a, half %b, half %c) #0 {
+define { half, half } @v_fneg_fma_store_use_fneg_x_y_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2435,7 +2435,7 @@ define { half, half } @v_fneg_fma_store_use_fneg_x_y_f16(half %a, half %b, half
   ret { half, half } %insert.1
 }
 
-define { half, half } @v_fneg_fma_multi_use_fneg_x_y_f16(half %a, half %b, half %c, half %d) #0 {
+define { half, half } @v_fneg_fma_multi_use_fneg_x_y_f16(half %a, half %b, half %c, half %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2516,7 +2516,7 @@ define { half, half } @v_fneg_fma_multi_use_fneg_x_y_f16(half %a, half %b, half
 ; fmad tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_fmad_f16(half %a, half %b, half %c) #0 {
+define half @v_fneg_fmad_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fmad_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2573,7 +2573,7 @@ define half @v_fneg_fmad_f16(half %a, half %b, half %c) #0 {
   ret half %fneg
 }
 
-define <4 x half> @v_fneg_fmad_v4f32(<4 x half> %a, <4 x half> %b, <4 x half> %c) #0 {
+define <4 x half> @v_fneg_fmad_v4f32(<4 x half> %a, <4 x half> %b, <4 x half> %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fmad_v4f32:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2705,7 +2705,7 @@ define <4 x half> @v_fneg_fmad_v4f32(<4 x half> %a, <4 x half> %b, <4 x half> %c
   ret <4 x half> %fneg
 }
 
-define { half, half } @v_fneg_fmad_multi_use_fmad_f16(half %a, half %b, half %c) #0 {
+define { half, half } @v_fneg_fmad_multi_use_fmad_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2776,7 +2776,7 @@ define { half, half } @v_fneg_fmad_multi_use_fmad_f16(half %a, half %b, half %c)
 ; fp_extend tests
 ; --------------------------------------------------------------------------------
 
-define double @v_fneg_fp_extend_f16_to_f64(half %a) #0 {
+define double @v_fneg_fp_extend_f16_to_f64(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_extend_f16_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2804,7 +2804,7 @@ define double @v_fneg_fp_extend_f16_to_f64(half %a) #0 {
   ret double %fneg
 }
 
-define double @v_fneg_fp_extend_fneg_f16_to_f64(half %a) #0 {
+define double @v_fneg_fp_extend_fneg_f16_to_f64(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2831,7 +2831,7 @@ define double @v_fneg_fp_extend_fneg_f16_to_f64(half %a) #0 {
   ret double %fneg
 }
 
-define { double, half } @v_fneg_fp_extend_store_use_fneg_f16_to_f64(half %a) #0 {
+define { double, half } @v_fneg_fp_extend_store_use_fneg_f16_to_f64(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2866,7 +2866,7 @@ define { double, half } @v_fneg_fp_extend_store_use_fneg_f16_to_f64(half %a) #0
   ret { double, half } %insert.1
 }
 
-define { double, double } @v_fneg_multi_use_fp_extend_fneg_f16_to_f64(half %a) #0 {
+define { double, double } @v_fneg_multi_use_fp_extend_fneg_f16_to_f64(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2901,7 +2901,7 @@ define { double, double } @v_fneg_multi_use_fp_extend_fneg_f16_to_f64(half %a) #
   ret { double, double } %insert.1
 }
 
-define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64(half %a) #0 {
+define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2937,7 +2937,7 @@ define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64(h
   ret { double, double } %insert.1
 }
 
-define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0 {
+define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2970,7 +2970,7 @@ define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0
 ; fp_round tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
+define half @v_fneg_fp_round_f64_to_f16(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_round_f64_to_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3121,7 +3121,7 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
+define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_round_fneg_f64_to_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3268,7 +3268,7 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
   ret half %fneg
 }
 
-define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0 {
+define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3427,7 +3427,7 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
   ret { half, double } %insert.1
 }
 
-define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, double %c) #0 {
+define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, double %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3585,7 +3585,7 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
   ret { half, double } %insert.1
 }
 
-define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
+define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3741,7 +3741,7 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
 ; ftrunc tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_trunc_f16(half %a) #0 {
+define half @v_fneg_trunc_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_trunc_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3770,7 +3770,7 @@ define half @v_fneg_trunc_f16(half %a) #0 {
 ; fround tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_round_f16(half %a) #0 {
+define half @v_fneg_round_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_round_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3864,7 +3864,7 @@ define half @v_fneg_round_f16(half %a) #0 {
 ; rint tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_rint_f16(half %a) #0 {
+define half @v_fneg_rint_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_rint_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3893,7 +3893,7 @@ define half @v_fneg_rint_f16(half %a) #0 {
 ; nearbyint tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_nearbyint_f16(half %a) #0 {
+define half @v_fneg_nearbyint_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_nearbyint_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3922,7 +3922,7 @@ define half @v_fneg_nearbyint_f16(half %a) #0 {
 ; sin tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_sin_f16(half %a) #0 {
+define half @v_fneg_sin_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_sin_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3957,7 +3957,7 @@ define half @v_fneg_sin_f16(half %a) #0 {
 ; fcanonicalize tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_canonicalize_f16(half %a) #0 {
+define half @v_fneg_canonicalize_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_canonicalize_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3985,7 +3985,7 @@ define half @v_fneg_canonicalize_f16(half %a) #0 {
 ; CopyToReg tests
 ; --------------------------------------------------------------------------------
 
-define void @v_fneg_copytoreg_f16(ptr addrspace(1) %out, half %a, half %b, half %c, i32 %d) #0 {
+define void @v_fneg_copytoreg_f16(ptr addrspace(1) %out, half %a, half %b, half %c, i32 %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_copytoreg_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4082,7 +4082,7 @@ endif:
 ; --------------------------------------------------------------------------------
 
 ; Can't fold into use, so should fold into source
-define half @v_fneg_inlineasm_f16(half %a, half %b, half %c, i32 %d) #0 {
+define half @v_fneg_inlineasm_f16(half %a, half %b, half %c, i32 %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_inlineasm_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4116,7 +4116,7 @@ define half @v_fneg_inlineasm_f16(half %a, half %b, half %c, i32 %d) #0 {
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %mul = fmul half %a, %b
   %fneg = fneg half %mul
-  call void asm sideeffect "; use $0", "v"(half %fneg) #0
+  call void asm sideeffect "; use $0", "v"(half %fneg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret half %fneg
 }
 
@@ -4125,7 +4125,7 @@ define half @v_fneg_inlineasm_f16(half %a, half %b, half %c, i32 %d) #0 {
 ; --------------------------------------------------------------------------------
 
 ; Can't fold into use, so should fold into source
-define half @v_fneg_inlineasm_multi_use_src_f16(ptr addrspace(1) %out, half %a, half %b, half %c, i32 %d) #0 {
+define half @v_fneg_inlineasm_multi_use_src_f16(ptr addrspace(1) %out, half %a, half %b, half %c, i32 %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_inlineasm_multi_use_src_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4165,7 +4165,7 @@ define half @v_fneg_inlineasm_multi_use_src_f16(ptr addrspace(1) %out, half %a,
   %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
   %mul = fmul half %a, %b
   %fneg = fneg half %mul
-  call void asm sideeffect "; use $0", "v"(half %fneg) #0
+  call void asm sideeffect "; use $0", "v"(half %fneg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret half %mul
 }
 
@@ -4175,7 +4175,7 @@ define half @v_fneg_inlineasm_multi_use_src_f16(ptr addrspace(1) %out, half %a,
 
 ; There are multiple users of the fneg that must use a VOP3
 ; instruction, so there is no penalty
-define { half, half } @multiuse_fneg_2_vop3_users_f16(half %a, half %b, half %c) #0 {
+define { half, half } @multiuse_fneg_2_vop3_users_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: multiuse_fneg_2_vop3_users_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4215,7 +4215,7 @@ define { half, half } @multiuse_fneg_2_vop3_users_f16(half %a, half %b, half %c)
 
 ; There are multiple users, but both require using a larger encoding
 ; for the modifier.
-define { half, half } @multiuse_fneg_2_vop2_users_f16(half %a, half %b, half %c) #0 {
+define { half, half } @multiuse_fneg_2_vop2_users_f16(half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: multiuse_fneg_2_vop2_users_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4254,7 +4254,7 @@ define { half, half } @multiuse_fneg_2_vop2_users_f16(half %a, half %b, half %c)
 }
 
 ; One user is VOP3 so has no cost to folding the modifier, the other does.
-define { half, half } @multiuse_fneg_vop2_vop3_users_f16(ptr addrspace(1) %out, half %a, half %b, half %c) #0 {
+define { half, half } @multiuse_fneg_vop2_vop3_users_f16(ptr addrspace(1) %out, half %a, half %b, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: multiuse_fneg_vop2_vop3_users_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4296,7 +4296,7 @@ define { half, half } @multiuse_fneg_vop2_vop3_users_f16(ptr addrspace(1) %out,
 
 ; The use of the fneg requires a code size increase, but folding into
 ; the source does not
-define { half, half } @free_fold_src_code_size_cost_use_f16(ptr addrspace(1) %out, half %a, half %b, half %c, half %d) #0 {
+define { half, half } @free_fold_src_code_size_cost_use_f16(ptr addrspace(1) %out, half %a, half %b, half %c, half %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: free_fold_src_code_size_cost_use_f16:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4378,7 +4378,7 @@ define { half, half } @free_fold_src_code_size_cost_use_f16(ptr addrspace(1) %ou
 
 ; %trunc.a has one fneg use, but it requires a code size increase and
 ; %the fneg can instead be folded for free into the fma.
-define half @one_use_cost_to_fold_into_src_f16(ptr addrspace(1) %out, half %a, half %b, half %c, half %d) #0 {
+define half @one_use_cost_to_fold_into_src_f16(ptr addrspace(1) %out, half %a, half %b, half %c, half %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: one_use_cost_to_fold_into_src_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4416,7 +4416,7 @@ define half @one_use_cost_to_fold_into_src_f16(ptr addrspace(1) %out, half %a, h
   ret half %fma0
 }
 
-define { half, half } @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, half %a, half %b, half %c, half %d) #0 {
+define { half, half } @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, half %a, half %b, half %c, half %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: multi_use_cost_to_fold_into_src:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4465,7 +4465,7 @@ define { half, half } @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, ha
 ; undone by the generic combine to pull the fneg out of the fma if
 ; !isFNegFree. We were reporting false for v2f32 even though it will
 ; be split into f32 where it will be free.
-define <2 x half> @fneg_fma_fneg_dagcombine_loop(<2 x half> %arg, <2 x half> %arg1, <2 x half> %arg2) #0 {
+define <2 x half> @fneg_fma_fneg_dagcombine_loop(<2 x half> %arg, <2 x half> %arg1, <2 x half> %arg2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: fneg_fma_fneg_dagcombine_loop:
 ; SI:       ; %bb.0: ; %bb
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4529,7 +4529,7 @@ bb:
 
 ; This expects denormal flushing, so can't turn this fmul into fneg
 ; TODO: Keeping this as fmul saves encoding size
-define half @nnan_fmul_neg1_to_fneg(half %x, half %y) #0 {
+define half @nnan_fmul_neg1_to_fneg(half %x, half %y) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: nnan_fmul_neg1_to_fneg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4619,7 +4619,7 @@ define half @denorm_snan_fmul_neg1_to_fneg(half %x, half %y) {
   ret half %add
 }
 
-define half @flush_snan_fmul_neg1_to_fneg(half %x, half %y) #0 {
+define half @flush_snan_fmul_neg1_to_fneg(half %x, half %y) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: flush_snan_fmul_neg1_to_fneg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4741,23 +4741,17 @@ define <2 x half> @fadd_select_fneg_fneg_v2f16(i32 %arg0, <2 x half> %x, <2 x ha
   ret <2 x half> %add
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare half @llvm.sin.f16(half) #1
-declare half @llvm.trunc.f16(half) #1
-declare half @llvm.round.f16(half) #1
-declare half @llvm.rint.f16(half) #1
-declare half @llvm.nearbyint.f16(half) #1
-declare half @llvm.roundeven.f16(half) #1
-declare half @llvm.canonicalize.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare half @llvm.fma.f16(half, half, half) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare half @llvm.sin.f16(half) nounwind readnone
+declare half @llvm.trunc.f16(half) nounwind readnone
+declare half @llvm.round.f16(half) nounwind readnone
+declare half @llvm.rint.f16(half) nounwind readnone
+declare half @llvm.nearbyint.f16(half) nounwind readnone
+declare half @llvm.roundeven.f16(half) nounwind readnone
+declare half @llvm.canonicalize.f16(half) nounwind readnone
+declare half @llvm.minnum.f16(half, half) nounwind readnone
+declare half @llvm.maxnum.f16(half, half) nounwind readnone
+declare half @llvm.fma.f16(half, half, half) nounwind readnone
 declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
-declare half @llvm.fmuladd.f16(half, half, half) #1
-declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
-attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+declare half @llvm.fmuladd.f16(half, half, half) nounwind readnone
+declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll
index d8bbda19453fa6..ea586f8058b469 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll
@@ -6,7 +6,7 @@
 ; rcp tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_rcp_f16(half %a) #0 {
+define half @v_fneg_rcp_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_rcp_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -17,7 +17,7 @@ define half @v_fneg_rcp_f16(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_rcp_fneg_f16(half %a) #0 {
+define half @v_fneg_rcp_fneg_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_rcp_fneg_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,7 +29,7 @@ define half @v_fneg_rcp_fneg_f16(half %a) #0 {
   ret half %fneg
 }
 
-define { half, half } @v_fneg_rcp_store_use_fneg_f16(half %a) #0 {
+define { half, half } @v_fneg_rcp_store_use_fneg_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_rcp_store_use_fneg_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -45,7 +45,7 @@ define { half, half } @v_fneg_rcp_store_use_fneg_f16(half %a) #0 {
   ret { half, half } %insert.1
 }
 
-define { half, half } @v_fneg_rcp_multi_use_fneg_f16(half %a, half %c) #0 {
+define { half, half } @v_fneg_rcp_multi_use_fneg_f16(half %a, half %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -66,7 +66,7 @@ define { half, half } @v_fneg_rcp_multi_use_fneg_f16(half %a, half %c) #0 {
 ; sin tests
 ; --------------------------------------------------------------------------------
 
-define half @v_fneg_amdgcn_sin_f16(half %a) #0 {
+define half @v_fneg_amdgcn_sin_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_amdgcn_sin_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -81,7 +81,7 @@ define half @v_fneg_amdgcn_sin_f16(half %a) #0 {
 ; vintrp tests
 ; --------------------------------------------------------------------------------
 
-define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 {
+define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_interp_p1_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -100,7 +100,7 @@ define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 {
   ret { float, float } %insert.1
 }
 
-define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) #0 {
+define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_interp_p2_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -125,7 +125,7 @@ define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) #0 {
 ; --------------------------------------------------------------------------------
 
 ; FIXME: Legalization/promote is broken
-define half @v_fneg_arithmetic_fence_f16(half %a) #0 {
+define half @v_fneg_arithmetic_fence_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_arithmetic_fence_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    ;ARITH_FENCE
@@ -137,7 +137,7 @@ define half @v_fneg_arithmetic_fence_f16(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) #0 {
+define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -151,17 +151,11 @@ define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-declare half @llvm.amdgcn.rcp.f16(half) #1
-declare half @llvm.amdgcn.sin.f16(half) #1
-declare half @llvm.arithmetic.fence.f16(half) #1
-declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
-declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
-attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+declare half @llvm.amdgcn.rcp.f16(half) nounwind readnone
+declare half @llvm.amdgcn.sin.f16(half) nounwind readnone
+declare half @llvm.arithmetic.fence.f16(half) nounwind readnone
+declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN-NSZ: {{.*}}
 ; GCN-SAFE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index 0cb4b8c960bbfd..6f982c22fd84fc 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -17,7 +17,7 @@
 
 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -40,7 +40,7 @@ define amdgpu_kernel void @v_fneg_add_f32(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_add_store_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_store_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -70,7 +70,7 @@ define amdgpu_kernel void @v_fneg_add_store_use_add_f32(ptr addrspace(1) %out, p
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -96,7 +96,7 @@ define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(ptr addrspace(1) %out, p
 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_add_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -120,7 +120,7 @@ define amdgpu_kernel void @v_fneg_add_fneg_x_f32(ptr addrspace(1) %out, ptr addr
 
 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_add_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -144,7 +144,7 @@ define amdgpu_kernel void @v_fneg_add_x_fneg_f32(ptr addrspace(1) %out, ptr addr
 
 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -174,7 +174,7 @@ define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(ptr addrspace(1) %out, ptr a
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -204,7 +204,7 @@ define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(ptr addrspace(1) %out
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -230,7 +230,7 @@ define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(ptr addrspace(1) %out
 ; GCN-NSZ-DAG: v_mul_f32_e32 v{{[0-9]+}}, 0, v
 ; GCN-NSZ: v_cmp_ngt_f32
 ; GCN-NSZ: v_cndmask_b32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
+define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 .entry:
   %tmp7 = fdiv float 1.000000e+00, %tmp6
   %tmp8 = fmul float 0.000000e+00, %tmp7
@@ -256,7 +256,7 @@ define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i
 ; GCN-NSZ-DAG: v_cndmask_b32_e64 [[E:v[0-9]+]], -[[D]], v{{[0-9]+}},
 ; GCN-NSZ-DAG: v_cmp_nlt_f32_e32 {{.*}}, 0
 ; GCN-NSZ-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, [[C]], 0,
-define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 {
+define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr nounwind "unsafe-fp-math"="true" {
 .entry:
   %tmp7 = fdiv afn float 1.000000e+00, %tmp6
   %tmp8 = fmul float 0.000000e+00, %tmp7
@@ -279,7 +279,7 @@ define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -300,7 +300,7 @@ define amdgpu_kernel void @v_fneg_mul_f32(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -325,7 +325,7 @@ define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(ptr addrspace(1) %out, p
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -346,7 +346,7 @@ define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -366,7 +366,7 @@ define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(ptr addrspace(1) %out, ptr addr
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -386,7 +386,7 @@ define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(ptr addrspace(1) %out, ptr addr
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -410,7 +410,7 @@ define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(ptr addrspace(1) %out, ptr a
 
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -433,7 +433,7 @@ define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(ptr addrspace(1) %out
 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -461,7 +461,7 @@ define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(ptr addrspace(1) %out
 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -480,7 +480,7 @@ define amdgpu_kernel void @v_fneg_minnum_f32_ieee(ptr addrspace(1) %out, ptr add
 ; GCN-NOT: v1
 ; GCN: v_max_f32_e64 v0, -v0, -v1
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
+define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %min = call float @llvm.minnum.f32(float %a, float %b)
   %fneg = fneg float %min
   ret float %fneg
@@ -491,7 +491,7 @@ define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -507,7 +507,7 @@ define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(ptr addrspace(1) %out, pt
 ; GCN-NOT: v0
 ; GCN: v_max_f32_e64 v0, -v0, -v0
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
+define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %min = call float @llvm.minnum.f32(float %a, float %a)
   %min.fneg = fneg float %min
   ret float %min.fneg
@@ -518,7 +518,7 @@ define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -534,7 +534,7 @@ define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(ptr addrspace(1) %out, pt
 ; GCN-NOT: v0
 ; GCN: v_max_f32_e64 v0, -v0, -4.0
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
+define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %min = call float @llvm.minnum.f32(float 4.0, float %a)
   %fneg = fneg float %min
   ret float %fneg
@@ -545,7 +545,7 @@ define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -561,7 +561,7 @@ define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(ptr addrspace(1) %out, pt
 ; GCN-NOT: v0
 ; GCN: v_max_f32_e64 v0, -v0, 4.0
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
+define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %min = call float @llvm.minnum.f32(float -4.0, float %a)
   %fneg = fneg float %min
   ret float %fneg
@@ -573,7 +573,7 @@ define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[A]]
 ; GCN: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MIN]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_0_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_0_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -590,7 +590,7 @@ define amdgpu_kernel void @v_fneg_0_minnum_f32(ptr addrspace(1) %out, ptr addrsp
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -613,7 +613,7 @@ define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(ptr addrspace(1) %out, pt
 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -635,7 +635,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr a
 ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -659,7 +659,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(ptr addrspace(1) %out, p
 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
 
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -682,7 +682,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr a
 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
 
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -706,7 +706,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(ptr addrspace(1) %out, p
 ; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
 
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
-define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -730,7 +730,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr a
 ; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
 
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -746,7 +746,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(ptr addrspace(1) %out, p
 ; GCN-NOT: v0
 ; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
+define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %min = call float @llvm.minnum.f32(float -0.0, float %a)
   %fneg = fneg float %min
   ret float %fneg
@@ -759,7 +759,7 @@ define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -788,7 +788,7 @@ define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(ptr addrspace(1
 ; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -809,7 +809,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(ptr addrspace(1
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
+define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %min = call float @llvm.minnum.f32(float 0.0, float %a)
   %fneg = fneg float %min
   %mul = fmul float %fneg, %b
@@ -827,7 +827,7 @@ define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -849,7 +849,7 @@ define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(ptr addrspace
 ; GCN: v_max_f32_e64 v0, -v0, -v1
 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT: ; return
-define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
+define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %min = call float @llvm.minnum.f32(float %a, float %b)
   %fneg = fneg float %min
   %use1 = fmul float %min, 4.0
@@ -870,7 +870,7 @@ define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %
 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -889,7 +889,7 @@ define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(ptr addrspace(1) %out, ptr add
 ; GCN-NOT: v1
 ; GCN: v_min_f32_e64 v0, -v0, -v1
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
+define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %max = call float @llvm.maxnum.f32(float %a, float %b)
   %fneg = fneg float %max
   ret float %fneg
@@ -900,7 +900,7 @@ define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -916,7 +916,7 @@ define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(ptr addrspace(1) %out, pt
 ; GCN-NOT: v0
 ; GCN: v_min_f32_e64 v0, -v0, -v0
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
+define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %max = call float @llvm.maxnum.f32(float %a, float %a)
   %max.fneg = fneg float %max
   ret float %max.fneg
@@ -927,7 +927,7 @@ define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -943,7 +943,7 @@ define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(ptr addrspace(1) %out, pt
 ; GCN-NOT: v0
 ; GCN: v_min_f32_e64 v0, -v0, -4.0
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
+define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
   %fneg = fneg float %max
   ret float %fneg
@@ -954,7 +954,7 @@ define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -970,7 +970,7 @@ define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(ptr addrspace(1) %out, pt
 ; GCN-NOT: v0
 ; GCN: v_min_f32_e64 v0, -v0, 4.0
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
+define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
   %fneg = fneg float %max
   ret float %fneg
@@ -982,7 +982,7 @@ define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
 ; GCN: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_0_maxnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_0_maxnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -999,7 +999,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_f32(ptr addrspace(1) %out, ptr addrsp
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1015,7 +1015,7 @@ define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(ptr addrspace(1) %out, pt
 ; GCN-NOT: v0
 ; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
+define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
   %fneg = fneg float %max
   ret float %fneg
@@ -1028,7 +1028,7 @@ define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1049,7 +1049,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(ptr addrspace(1
 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
 ; GCN-NEXT: ; return
-define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
+define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
   %fneg = fneg float %max
   %mul = fmul float %fneg, %b
@@ -1067,7 +1067,7 @@ define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1089,7 +1089,7 @@ define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(ptr addrspace
 ; GCN: v_min_f32_e64 v0, -v0, -v1
 ; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
 ; GCN-NEXT: ; return
-define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
+define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %max = call float @llvm.maxnum.f32(float %a, float %b)
   %fneg = fneg float %max
   %use1 = fmul float %max, 4.0
@@ -1112,7 +1112,7 @@ define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %
 
 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1138,7 +1138,7 @@ define amdgpu_kernel void @v_fneg_fma_f32(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1171,7 +1171,7 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(ptr addrspace(1) %out, p
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1200,7 +1200,7 @@ define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(ptr addrspace(1) %out, p
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1228,7 +1228,7 @@ define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(ptr addrspace(1) %out, ptr ad
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1256,7 +1256,7 @@ define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(ptr addrspace(1) %out, ptr ad
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1285,7 +1285,7 @@ define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(ptr addrspace(1) %out, ptr
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1314,7 +1314,7 @@ define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(ptr addrspace(1) %out, ptr
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1348,7 +1348,7 @@ define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(ptr addrspace(1) %out, ptr ad
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
 ; GCN-NSZ-NOT: [[NEG_A]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1380,7 +1380,7 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(ptr addrspace(1) %o
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, float %d) #0 {
+define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, float %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1413,7 +1413,7 @@ define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(ptr addrspace(1) %o
 
 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1435,7 +1435,7 @@ define amdgpu_kernel void @v_fneg_fmad_f32(ptr addrspace(1) %out, ptr addrspace(
 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
-define amdgpu_kernel void @v_fneg_fmad_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fmad_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1467,7 +1467,7 @@ define amdgpu_kernel void @v_fneg_fmad_v4f32(ptr addrspace(1) %out, ptr addrspac
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1493,7 +1493,7 @@ define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(ptr addrspace(1) %out,
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1509,7 +1509,7 @@ define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(ptr addrspace(1) %out, pt
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1528,7 +1528,7 @@ define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %ou
 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
-define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1548,7 +1548,7 @@ define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(ptr addrsp
 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[CVT_LO]]:[[CVT_HI]]]
-define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1568,7 +1568,7 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(ptr addrsp
 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v[[[CVT_LO]]:[[CVT_HI]]], 4.0
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1584,7 +1584,7 @@ define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(p
 
 ; FIXME: Source modifiers not folded for f16->f32
 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
-define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1598,7 +1598,7 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(ptr addrsp
 }
 
 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
-define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1620,7 +1620,7 @@ define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(p
 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1636,7 +1636,7 @@ define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(ptr addrspace(1) %out, ptr
 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1655,7 +1655,7 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out
 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[A_LO]]:[[NEG_A_HI]]]
-define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1676,7 +1676,7 @@ define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(ptr addrspa
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
-define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, double %c) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, double %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1695,7 +1695,7 @@ define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(ptr addrspa
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1711,7 +1711,7 @@ define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(ptr addrspace(1) %out, ptr
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1730,7 +1730,7 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(ptr addrspace(1) %out
 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
-define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1749,7 +1749,7 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(ptr addrspa
 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1769,7 +1769,7 @@ define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(ptr addrspa
 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
-define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1792,7 +1792,7 @@ define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(ptr addrspa
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_rcp_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rcp_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1808,7 +1808,7 @@ define amdgpu_kernel void @v_fneg_rcp_f32(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_rcp_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rcp_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1827,7 +1827,7 @@ define amdgpu_kernel void @v_fneg_rcp_fneg_f32(ptr addrspace(1) %out, ptr addrsp
 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1847,7 +1847,7 @@ define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(ptr addrspace(1) %out,
 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1871,7 +1871,7 @@ define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(ptr addrspace(1) %out,
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1894,7 +1894,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_f32(ptr addrspace(1) %out, ptr addr
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1918,7 +1918,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(ptr addrsp
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1939,7 +1939,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(ptr addrsp
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1959,7 +1959,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -1979,7 +1979,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2002,7 +2002,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(ptr addrspace(1) %out
 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2025,7 +2025,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(ptr addrspace(
 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2052,7 +2052,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(ptr addrspace(
 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2068,7 +2068,7 @@ define amdgpu_kernel void @v_fneg_sin_f32(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2088,7 +2088,7 @@ define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(ptr addrspace(1) %out, ptr addr
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_trunc_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_trunc_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2115,7 +2115,7 @@ define amdgpu_kernel void @v_fneg_trunc_f32(ptr addrspace(1) %out, ptr addrspace
 
 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_round_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_round_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2135,7 +2135,7 @@ define amdgpu_kernel void @v_fneg_round_f32(ptr addrspace(1) %out, ptr addrspace
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_rint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2155,7 +2155,7 @@ define amdgpu_kernel void @v_fneg_rint_f32(ptr addrspace(1) %out, ptr addrspace(
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_nearbyint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_nearbyint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2175,7 +2175,7 @@ define amdgpu_kernel void @v_fneg_nearbyint_f32(ptr addrspace(1) %out, ptr addrs
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_canonicalize_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_canonicalize_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2197,7 +2197,7 @@ define amdgpu_kernel void @v_fneg_canonicalize_f32(ptr addrspace(1) %out, ptr ad
 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_interp_p1_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_interp_p1_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2220,7 +2220,7 @@ define amdgpu_kernel void @v_fneg_interp_p1_f32(ptr addrspace(1) %out, ptr addrs
 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_interp_p2_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_interp_p2_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2255,7 +2255,7 @@ define amdgpu_kernel void @v_fneg_interp_p2_f32(ptr addrspace(1) %out, ptr addrs
 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 
-define amdgpu_kernel void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
+define amdgpu_kernel void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2291,7 +2291,7 @@ endif:
 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
 ; GCN: ; use [[MUL]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_inlineasm_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
+define amdgpu_kernel void @v_fneg_inlineasm_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2303,7 +2303,7 @@ define amdgpu_kernel void @v_fneg_inlineasm_f32(ptr addrspace(1) %out, ptr addrs
   %c = load volatile float, ptr addrspace(1) %c.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
-  call void asm sideeffect "; use $0", "v"(float %fneg) #0
+  call void asm sideeffect "; use $0", "v"(float %fneg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   store volatile float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
@@ -2320,7 +2320,7 @@ define amdgpu_kernel void @v_fneg_inlineasm_f32(ptr addrspace(1) %out, ptr addrs
 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
 ; GCN: ; use [[NEG]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
+define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2332,7 +2332,7 @@ define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %
   %c = load volatile float, ptr addrspace(1) %c.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
-  call void asm sideeffect "; use $0", "v"(float %fneg) #0
+  call void asm sideeffect "; use $0", "v"(float %fneg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   store volatile float %mul, ptr addrspace(1) %out.gep
   ret void
 }
@@ -2355,7 +2355,7 @@ define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2389,7 +2389,7 @@ define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(ptr addrspace(1) %out,
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2422,7 +2422,7 @@ define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(ptr addrspace(1) %out,
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
+define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2463,7 +2463,7 @@ define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(ptr addrspace(1) %o
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
+define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2500,7 +2500,7 @@ define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(ptr addrspace(1)
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
+define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2533,7 +2533,7 @@ define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(ptr addrspace(1)
 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
-define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
+define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2563,7 +2563,7 @@ define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(ptr addrspace(1) %o
 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
-define amdgpu_kernel void @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
+define amdgpu_kernel void @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -2598,7 +2598,7 @@ define amdgpu_kernel void @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out
 ; GCN-DAG: v_mul_f32_e32 v0, [[SUB0]], v4
 ; GCN-DAG: v_mul_f32_e32 v1, [[SUB1]], v5
 ; GCN: s_setpc_b64
-define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
+define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 bb:
   %i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)
   %i4 = fadd fast <2 x float> %i3, %arg
@@ -2611,7 +2611,7 @@ bb:
 ; GCN-LABEL: {{^}}nnan_fmul_neg1_to_fneg:
 ; GCN: s_waitcnt
 ; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
-define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
+define float @nnan_fmul_neg1_to_fneg(float %x, float %y) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %mul = fmul float %x, -1.0
   %add = fmul nnan float %mul, %y
   ret float %add
@@ -2645,7 +2645,7 @@ define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
 ; GCN: s_waitcnt
 ; GCN-NEXT: v_mul_f32_e32 [[TMP:v[0-9]+]], 1.0, v0
 ; GCN-NEXT: v_mul_f32_e64 v0, -[[TMP]], v1
-define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
+define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %quiet = call float @llvm.canonicalize.f32(float %x)
   %mul = fmul float %quiet, -1.0
   %add = fmul float %mul, %y
@@ -2811,31 +2811,26 @@ define amdgpu_kernel void @v_fneg_negk_select_f32(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fma.f32(float, float, float) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
-declare float @llvm.sin.f32(float) #1
-declare float @llvm.trunc.f32(float) #1
-declare float @llvm.round.f32(float) #1
-declare float @llvm.rint.f32(float) #1
-declare float @llvm.nearbyint.f32(float) #1
-declare float @llvm.canonicalize.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.fma.f64(double, double, double) #1
-
-declare float @llvm.amdgcn.sin.f32(float) #1
-declare float @llvm.amdgcn.rcp.f32(float) #1
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
-attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+declare float @llvm.sin.f32(float) nounwind readnone
+declare float @llvm.trunc.f32(float) nounwind readnone
+declare float @llvm.round.f32(float) nounwind readnone
+declare float @llvm.rint.f32(float) nounwind readnone
+declare float @llvm.nearbyint.f32(float) nounwind readnone
+declare float @llvm.canonicalize.f32(float) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare half @llvm.minnum.f16(half, half) nounwind readnone
+declare double @llvm.minnum.f64(double, double) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+
+declare float @llvm.amdgcn.sin.f32(float) nounwind readnone
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
+declare float @llvm.amdgcn.rcp.legacy(float) nounwind readnone
+declare float @llvm.amdgcn.fmul.legacy(float, float) nounwind readnone
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index b5440b9c38c9f2..1eb73083d89b05 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -9,7 +9,7 @@
 ; fadd tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_add_f32(float %a, float %b) #0 {
+define float @v_fneg_add_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -27,7 +27,7 @@ define float @v_fneg_add_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define { float, float } @v_fneg_add_store_use_add_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_add_store_use_add_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_add_store_use_add_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -41,7 +41,7 @@ define { float, float } @v_fneg_add_store_use_add_f32(float %a, float %b) #0 {
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_add_multi_use_add_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_add_multi_use_add_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_multi_use_add_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -65,7 +65,7 @@ define { float, float } @v_fneg_add_multi_use_add_f32(float %a, float %b) #0 {
   ret { float, float } %insert.1
 }
 
-define float @v_fneg_add_fneg_x_f32(float %a, float %b) #0 {
+define float @v_fneg_add_fneg_x_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_fneg_x_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -84,7 +84,7 @@ define float @v_fneg_add_fneg_x_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_add_x_fneg_f32(float %a, float %b) #0 {
+define float @v_fneg_add_x_fneg_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_x_fneg_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -103,7 +103,7 @@ define float @v_fneg_add_x_fneg_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_add_fneg_fneg_f32(float %a, float %b) #0 {
+define float @v_fneg_add_fneg_fneg_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_fneg_fneg_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -123,7 +123,7 @@ define float @v_fneg_add_fneg_fneg_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define { float, float } @v_fneg_add_store_use_fneg_x_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_add_store_use_fneg_x_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -148,7 +148,7 @@ define { float, float } @v_fneg_add_store_use_fneg_x_f32(float %a, float %b) #0
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_add_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 {
+define { float, float } @v_fneg_add_multi_use_fneg_x_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -176,7 +176,7 @@ define { float, float } @v_fneg_add_multi_use_fneg_x_f32(float %a, float %b, flo
 }
 
 ; This one asserted with -enable-no-signed-zeros-fp-math
-define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #0 {
+define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: fneg_fadd_0_f32:
 ; SI-SAFE:       ; %bb.0: ; %.entry
 ; SI-SAFE-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
@@ -288,7 +288,7 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
 ; function attribute unsafe-fp-math automatically. Combine with the previous test
 ; when that is done.
-define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #2 {
+define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) nounwind "unsafe-fp-math"="true" {
 ; SI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
 ; SI-SAFE:       ; %bb.0: ; %.entry
 ; SI-SAFE-NEXT:    v_min_legacy_f32_e64 v0, 0, s0
@@ -335,7 +335,7 @@ define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6
   ret float %.i198
 }
 
-define double @v_fneg_add_f64(double %a, double %b) #0 {
+define double @v_fneg_add_f64(double %a, double %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_f64:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -353,7 +353,7 @@ define double @v_fneg_add_f64(double %a, double %b) #0 {
   ret double %fneg
 }
 
-define { double, double } @v_fneg_add_store_use_add_f64(double %a, double %b) #0 {
+define { double, double } @v_fneg_add_store_use_add_f64(double %a, double %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_add_store_use_add_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -368,7 +368,7 @@ define { double, double } @v_fneg_add_store_use_add_f64(double %a, double %b) #0
   ret { double, double } %insert.1
 }
 
-define { double, double } @v_fneg_add_multi_use_add_f64(double %a, double %b) #0 {
+define { double, double } @v_fneg_add_multi_use_add_f64(double %a, double %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_multi_use_add_f64:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -401,7 +401,7 @@ define { double, double } @v_fneg_add_multi_use_add_f64(double %a, double %b) #0
   ret { double, double } %insert.1
 }
 
-define double @v_fneg_add_fneg_x_f64(double %a, double %b) #0 {
+define double @v_fneg_add_fneg_x_f64(double %a, double %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_fneg_x_f64:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -420,7 +420,7 @@ define double @v_fneg_add_fneg_x_f64(double %a, double %b) #0 {
   ret double %fneg
 }
 
-define double @v_fneg_add_x_fneg_f64(double %a, double %b) #0 {
+define double @v_fneg_add_x_fneg_f64(double %a, double %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_x_fneg_f64:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -439,7 +439,7 @@ define double @v_fneg_add_x_fneg_f64(double %a, double %b) #0 {
   ret double %fneg
 }
 
-define double @v_fneg_add_fneg_fneg_f64(double %a, double %b) #0 {
+define double @v_fneg_add_fneg_fneg_f64(double %a, double %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_add_fneg_fneg_f64:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -459,7 +459,7 @@ define double @v_fneg_add_fneg_fneg_f64(double %a, double %b) #0 {
   ret double %fneg
 }
 
-define { double, double } @v_fneg_add_store_use_fneg_x_f64(double %a, double %b) #0 {
+define { double, double } @v_fneg_add_store_use_fneg_x_f64(double %a, double %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f64:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -508,7 +508,7 @@ define { double, double } @v_fneg_add_store_use_fneg_x_f64(double %a, double %b)
   ret { double, double } %insert.1
 }
 
-define { double, double } @v_fneg_add_multi_use_fneg_x_f64(double %a, double %b, double %c) #0 {
+define { double, double } @v_fneg_add_multi_use_fneg_x_f64(double %a, double %b, double %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f64:
 ; SI-SAFE:       ; %bb.0:
 ; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -547,7 +547,7 @@ define { double, double } @v_fneg_add_multi_use_fneg_x_f64(double %a, double %b,
 }
 
 ; This one asserted with -enable-no-signed-zeros-fp-math
-define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #0 {
+define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-SAFE-LABEL: fneg_fadd_0_f64:
 ; SI-SAFE:       ; %bb.0: ; %.entry
 ; SI-SAFE-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
@@ -671,7 +671,7 @@ define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6,
 ; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
 ; function attribute unsafe-fp-math automatically. Combine with the previous test
 ; when that is done.
-define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #2 {
+define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) nounwind "unsafe-fp-math"="true" {
 ; GCN-SAFE-LABEL: fneg_fadd_0_nsz_f64:
 ; GCN-SAFE:       ; %bb.0: ; %.entry
 ; GCN-SAFE-NEXT:    v_cmp_ngt_f64_e64 s[2:3], s[0:1], 0
@@ -746,7 +746,7 @@ define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %t
 ; fmul tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_mul_f32(float %a, float %b) #0 {
+define float @v_fneg_mul_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -757,7 +757,7 @@ define float @v_fneg_mul_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define { float, float } @v_fneg_mul_store_use_mul_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_mul_store_use_mul_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_store_use_mul_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -771,7 +771,7 @@ define { float, float } @v_fneg_mul_store_use_mul_f32(float %a, float %b) #0 {
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_mul_multi_use_mul_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_mul_multi_use_mul_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_multi_use_mul_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -786,7 +786,7 @@ define { float, float } @v_fneg_mul_multi_use_mul_f32(float %a, float %b) #0 {
   ret { float, float } %insert.1
 }
 
-define float @v_fneg_mul_fneg_x_f32(float %a, float %b) #0 {
+define float @v_fneg_mul_fneg_x_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_fneg_x_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -798,7 +798,7 @@ define float @v_fneg_mul_fneg_x_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_mul_x_fneg_f32(float %a, float %b) #0 {
+define float @v_fneg_mul_x_fneg_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_x_fneg_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -810,7 +810,7 @@ define float @v_fneg_mul_x_fneg_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_mul_fneg_fneg_f32(float %a, float %b) #0 {
+define float @v_fneg_mul_fneg_fneg_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_fneg_fneg_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -823,7 +823,7 @@ define float @v_fneg_mul_fneg_fneg_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define { float, float } @v_fneg_mul_store_use_fneg_x_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_mul_store_use_fneg_x_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_store_use_fneg_x_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -839,7 +839,7 @@ define { float, float } @v_fneg_mul_store_use_fneg_x_f32(float %a, float %b) #0
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_mul_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 {
+define { float, float } @v_fneg_mul_multi_use_fneg_x_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_multi_use_fneg_x_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -860,7 +860,7 @@ define { float, float } @v_fneg_mul_multi_use_fneg_x_f32(float %a, float %b, flo
 ; fminnum tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_minnum_f32_ieee(float %a, float %b) #0 {
+define float @v_fneg_minnum_f32_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_minnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -873,7 +873,7 @@ define float @v_fneg_minnum_f32_ieee(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #4 {
+define float @v_fneg_minnum_f32_no_ieee(float %a, float %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_minnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -884,7 +884,7 @@ define float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #4 {
   ret float %fneg
 }
 
-define float @v_fneg_self_minnum_f32_ieee(float %a) #0 {
+define float @v_fneg_self_minnum_f32_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_self_minnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -895,7 +895,7 @@ define float @v_fneg_self_minnum_f32_ieee(float %a) #0 {
   ret float %min.fneg
 }
 
-define float @v_fneg_self_minnum_f32_no_ieee(float %a) #4 {
+define float @v_fneg_self_minnum_f32_no_ieee(float %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_self_minnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -906,7 +906,7 @@ define float @v_fneg_self_minnum_f32_no_ieee(float %a) #4 {
   ret float %min.fneg
 }
 
-define float @v_fneg_posk_minnum_f32_ieee(float %a) #0 {
+define float @v_fneg_posk_minnum_f32_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_posk_minnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -918,7 +918,7 @@ define float @v_fneg_posk_minnum_f32_ieee(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_posk_minnum_f32_no_ieee(float %a) #4 {
+define float @v_fneg_posk_minnum_f32_no_ieee(float %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_posk_minnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -929,7 +929,7 @@ define float @v_fneg_posk_minnum_f32_no_ieee(float %a) #4 {
   ret float %fneg
 }
 
-define float @v_fneg_negk_minnum_f32_ieee(float %a) #0 {
+define float @v_fneg_negk_minnum_f32_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_negk_minnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -941,7 +941,7 @@ define float @v_fneg_negk_minnum_f32_ieee(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_negk_minnum_f32_no_ieee(float %a) #4 {
+define float @v_fneg_negk_minnum_f32_no_ieee(float %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_negk_minnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -952,7 +952,7 @@ define float @v_fneg_negk_minnum_f32_no_ieee(float %a) #4 {
   ret float %fneg
 }
 
-define float @v_fneg_0_minnum_f32(float %a) #0 {
+define float @v_fneg_0_minnum_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_0_minnum_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -964,7 +964,7 @@ define float @v_fneg_0_minnum_f32(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_neg0_minnum_f32_ieee(float %a) #0 {
+define float @v_fneg_neg0_minnum_f32_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_neg0_minnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -976,7 +976,7 @@ define float @v_fneg_neg0_minnum_f32_ieee(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_inv2pi_minnum_f32(float %a) #0 {
+define float @v_fneg_inv2pi_minnum_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_inv2pi_minnum_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -996,7 +996,7 @@ define float @v_fneg_inv2pi_minnum_f32(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_neg_inv2pi_minnum_f32(float %a) #0 {
+define float @v_fneg_neg_inv2pi_minnum_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_neg_inv2pi_minnum_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1015,7 +1015,7 @@ define float @v_fneg_neg_inv2pi_minnum_f32(float %a) #0 {
   ret float %fneg
 }
 
-define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
+define half @v_fneg_inv2pi_minnum_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_inv2pi_minnum_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1036,7 +1036,7 @@ define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
+define half @v_fneg_neg_inv2pi_minnum_f16(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_neg_inv2pi_minnum_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1056,7 +1056,7 @@ define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
   ret half %fneg
 }
 
-define double @v_fneg_inv2pi_minnum_f64(double %a) #0 {
+define double @v_fneg_inv2pi_minnum_f64(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_inv2pi_minnum_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1078,7 +1078,7 @@ define double @v_fneg_inv2pi_minnum_f64(double %a) #0 {
   ret double %fneg
 }
 
-define double @v_fneg_neg_inv2pi_minnum_f64(double %a) #0 {
+define double @v_fneg_neg_inv2pi_minnum_f64(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_neg_inv2pi_minnum_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1099,7 +1099,7 @@ define double @v_fneg_neg_inv2pi_minnum_f64(double %a) #0 {
   ret double %fneg
 }
 
-define float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #4 {
+define float @v_fneg_neg0_minnum_f32_no_ieee(float %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_neg0_minnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1110,7 +1110,7 @@ define float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #4 {
   ret float %fneg
 }
 
-define float @v_fneg_0_minnum_foldable_use_f32_ieee(float %a, float %b) #0 {
+define float @v_fneg_0_minnum_foldable_use_f32_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_0_minnum_foldable_use_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1124,7 +1124,7 @@ define float @v_fneg_0_minnum_foldable_use_f32_ieee(float %a, float %b) #0 {
   ret float %mul
 }
 
-define float @v_fneg_inv2pi_minnum_foldable_use_f32(float %a, float %b) #0 {
+define float @v_fneg_inv2pi_minnum_foldable_use_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1146,7 +1146,7 @@ define float @v_fneg_inv2pi_minnum_foldable_use_f32(float %a, float %b) #0 {
   ret float %mul
 }
 
-define float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
+define float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_0_minnum_foldable_use_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1159,7 +1159,7 @@ define float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
   ret float %mul
 }
 
-define { float, float } @v_fneg_minnum_multi_use_minnum_f32_ieee(float %a, float %b) #0 {
+define { float, float } @v_fneg_minnum_multi_use_minnum_f32_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_minnum_multi_use_minnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1176,7 +1176,7 @@ define { float, float } @v_fneg_minnum_multi_use_minnum_f32_ieee(float %a, float
   ret { float, float } %insert.1
 }
 
-define <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #4 {
+define <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_minnum_multi_use_minnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1195,7 +1195,7 @@ define <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %
 ; fmaxnum tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_maxnum_f32_ieee(float %a, float %b) #0 {
+define float @v_fneg_maxnum_f32_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_maxnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1208,7 +1208,7 @@ define float @v_fneg_maxnum_f32_ieee(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #4 {
+define float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_maxnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1219,7 +1219,7 @@ define float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #4 {
   ret float %fneg
 }
 
-define float @v_fneg_self_maxnum_f32_ieee(float %a) #0 {
+define float @v_fneg_self_maxnum_f32_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_self_maxnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1230,7 +1230,7 @@ define float @v_fneg_self_maxnum_f32_ieee(float %a) #0 {
   ret float %max.fneg
 }
 
-define float @v_fneg_self_maxnum_f32_no_ieee(float %a) #4 {
+define float @v_fneg_self_maxnum_f32_no_ieee(float %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_self_maxnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1241,7 +1241,7 @@ define float @v_fneg_self_maxnum_f32_no_ieee(float %a) #4 {
   ret float %max.fneg
 }
 
-define float @v_fneg_posk_maxnum_f32_ieee(float %a) #0 {
+define float @v_fneg_posk_maxnum_f32_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_posk_maxnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1253,7 +1253,7 @@ define float @v_fneg_posk_maxnum_f32_ieee(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #4 {
+define float @v_fneg_posk_maxnum_f32_no_ieee(float %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_posk_maxnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1264,7 +1264,7 @@ define float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #4 {
   ret float %fneg
 }
 
-define float @v_fneg_negk_maxnum_f32_ieee(float %a) #0 {
+define float @v_fneg_negk_maxnum_f32_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_negk_maxnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1276,7 +1276,7 @@ define float @v_fneg_negk_maxnum_f32_ieee(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #4 {
+define float @v_fneg_negk_maxnum_f32_no_ieee(float %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_negk_maxnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1287,7 +1287,7 @@ define float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #4 {
   ret float %fneg
 }
 
-define float @v_fneg_0_maxnum_f32(float %a) #0 {
+define float @v_fneg_0_maxnum_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_0_maxnum_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1299,7 +1299,7 @@ define float @v_fneg_0_maxnum_f32(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_neg0_maxnum_f32_ieee(float %a) #0 {
+define float @v_fneg_neg0_maxnum_f32_ieee(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_neg0_maxnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1311,7 +1311,7 @@ define float @v_fneg_neg0_maxnum_f32_ieee(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #4 {
+define float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_neg0_maxnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1322,7 +1322,7 @@ define float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #4 {
   ret float %fneg
 }
 
-define float @v_fneg_0_maxnum_foldable_use_f32_ieee(float %a, float %b) #0 {
+define float @v_fneg_0_maxnum_foldable_use_f32_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_0_maxnum_foldable_use_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1336,7 +1336,7 @@ define float @v_fneg_0_maxnum_foldable_use_f32_ieee(float %a, float %b) #0 {
   ret float %mul
 }
 
-define float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
+define float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_0_maxnum_foldable_use_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1349,7 +1349,7 @@ define float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
   ret float %mul
 }
 
-define { float, float } @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float %a, float %b) #0 {
+define { float, float } @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_maxnum_multi_use_maxnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1366,7 +1366,7 @@ define { float, float } @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float %a, float
   ret { float, float } %insert.1
 }
 
-define <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #4 {
+define <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1385,7 +1385,7 @@ define <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %
 ; fma tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_fma_f32(float %a, float %b, float %c) #0 {
+define float @v_fneg_fma_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fma_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1403,7 +1403,7 @@ define float @v_fneg_fma_f32(float %a, float %b, float %c) #0 {
   ret float %fneg
 }
 
-define { float, float } @v_fneg_fma_store_use_fma_f32(float %a, float %b, float %c) #0 {
+define { float, float } @v_fneg_fma_store_use_fma_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_fma_store_use_fma_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1417,7 +1417,7 @@ define { float, float } @v_fneg_fma_store_use_fma_f32(float %a, float %b, float
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_fma_multi_use_fma_f32(float %a, float %b, float %c) #0 {
+define { float, float } @v_fneg_fma_multi_use_fma_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fma_multi_use_fma_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1440,7 +1440,7 @@ define { float, float } @v_fneg_fma_multi_use_fma_f32(float %a, float %b, float
   ret { float, float } %insert.1
 }
 
-define float @v_fneg_fma_fneg_x_y_f32(float %a, float %b, float %c) #0 {
+define float @v_fneg_fma_fneg_x_y_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fma_fneg_x_y_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1459,7 +1459,7 @@ define float @v_fneg_fma_fneg_x_y_f32(float %a, float %b, float %c) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_fma_x_fneg_y_f32(float %a, float %b, float %c) #0 {
+define float @v_fneg_fma_x_fneg_y_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fma_x_fneg_y_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1478,7 +1478,7 @@ define float @v_fneg_fma_x_fneg_y_f32(float %a, float %b, float %c) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_fma_fneg_fneg_y_f32(float %a, float %b, float %c) #0 {
+define float @v_fneg_fma_fneg_fneg_y_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1498,7 +1498,7 @@ define float @v_fneg_fma_fneg_fneg_y_f32(float %a, float %b, float %c) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_fma_fneg_x_fneg_f32(float %a, float %b, float %c) #0 {
+define float @v_fneg_fma_fneg_x_fneg_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1518,7 +1518,7 @@ define float @v_fneg_fma_fneg_x_fneg_f32(float %a, float %b, float %c) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_fma_x_y_fneg_f32(float %a, float %b, float %c) #0 {
+define float @v_fneg_fma_x_y_fneg_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fma_x_y_fneg_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1537,7 +1537,7 @@ define float @v_fneg_fma_x_y_fneg_f32(float %a, float %b, float %c) #0 {
   ret float %fneg
 }
 
-define { float, float } @v_fneg_fma_store_use_fneg_x_y_f32(float %a, float %b, float %c) #0 {
+define { float, float } @v_fneg_fma_store_use_fneg_x_y_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1562,7 +1562,7 @@ define { float, float } @v_fneg_fma_store_use_fneg_x_y_f32(float %a, float %b, f
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_fma_multi_use_fneg_x_y_f32(float %a, float %b, float %c, float %d) #0 {
+define { float, float } @v_fneg_fma_multi_use_fneg_x_y_f32(float %a, float %b, float %c, float %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1592,7 +1592,7 @@ define { float, float } @v_fneg_fma_multi_use_fneg_x_y_f32(float %a, float %b, f
 ; fmad tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_fmad_f32(float %a, float %b, float %c) #0 {
+define float @v_fneg_fmad_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fmad_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1610,7 +1610,7 @@ define float @v_fneg_fmad_f32(float %a, float %b, float %c) #0 {
   ret float %fneg
 }
 
-define <4 x float> @v_fneg_fmad_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+define <4 x float> @v_fneg_fmad_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fmad_v4f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1637,7 +1637,7 @@ define <4 x float> @v_fneg_fmad_v4f32(<4 x float> %a, <4 x float> %b, <4 x float
   ret <4 x float> %fneg
 }
 
-define { float, float } @v_fneg_fmad_multi_use_fmad_f32(float %a, float %b, float %c) #0 {
+define { float, float } @v_fneg_fmad_multi_use_fmad_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1664,7 +1664,7 @@ define { float, float } @v_fneg_fmad_multi_use_fmad_f32(float %a, float %b, floa
 ; fp_extend tests
 ; --------------------------------------------------------------------------------
 
-define double @v_fneg_fp_extend_f32_to_f64(float %a) #0 {
+define double @v_fneg_fp_extend_f32_to_f64(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_fp_extend_f32_to_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1675,7 +1675,7 @@ define double @v_fneg_fp_extend_f32_to_f64(float %a) #0 {
   ret double %fneg
 }
 
-define double @v_fneg_fp_extend_fneg_f32_to_f64(float %a) #0 {
+define double @v_fneg_fp_extend_fneg_f32_to_f64(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_fp_extend_fneg_f32_to_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1687,7 +1687,7 @@ define double @v_fneg_fp_extend_fneg_f32_to_f64(float %a) #0 {
   ret double %fneg
 }
 
-define { double, float } @v_fneg_fp_extend_store_use_fneg_f32_to_f64(float %a) #0 {
+define { double, float } @v_fneg_fp_extend_store_use_fneg_f32_to_f64(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_fp_extend_store_use_fneg_f32_to_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1703,7 +1703,7 @@ define { double, float } @v_fneg_fp_extend_store_use_fneg_f32_to_f64(float %a) #
   ret { double, float } %insert.1
 }
 
-define { double, double } @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(float %a) #0 {
+define { double, double } @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1718,7 +1718,7 @@ define { double, double } @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(float %a)
   ret { double, double } %insert.1
 }
 
-define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(float %a) #0 {
+define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1744,7 +1744,7 @@ define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(f
 }
 
 ; FIXME: Source modifiers not folded for f16->f32
-define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0 {
+define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1765,7 +1765,7 @@ define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(half %a) #0 {
+define { float, float } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1793,7 +1793,7 @@ define { float, float } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(hal
 ; fp_round tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_fp_round_f64_to_f32(double %a) #0 {
+define float @v_fneg_fp_round_f64_to_f32(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_fp_round_f64_to_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1804,7 +1804,7 @@ define float @v_fneg_fp_round_f64_to_f32(double %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_fp_round_fneg_f64_to_f32(double %a) #0 {
+define float @v_fneg_fp_round_fneg_f64_to_f32(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_fp_round_fneg_f64_to_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1816,7 +1816,7 @@ define float @v_fneg_fp_round_fneg_f64_to_f32(double %a) #0 {
   ret float %fneg
 }
 
-define { float, double } @v_fneg_fp_round_store_use_fneg_f64_to_f32(double %a) #0 {
+define { float, double } @v_fneg_fp_round_store_use_fneg_f64_to_f32(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1833,7 +1833,7 @@ define { float, double } @v_fneg_fp_round_store_use_fneg_f64_to_f32(double %a) #
   ret { float, double } %insert.1
 }
 
-define { float, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f32(double %a, double %c) #0 {
+define { float, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f32(double %a, double %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1859,7 +1859,7 @@ define { float, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f32(double %a, d
   ret { float, double } %insert.1
 }
 
-define half @v_fneg_fp_round_f32_to_f16(float %a) #0 {
+define half @v_fneg_fp_round_f32_to_f16(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_round_f32_to_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1877,7 +1877,7 @@ define half @v_fneg_fp_round_f32_to_f16(float %a) #0 {
   ret half %fneg
 }
 
-define half @v_fneg_fp_round_fneg_f32_to_f16(float %a) #0 {
+define half @v_fneg_fp_round_fneg_f32_to_f16(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_round_fneg_f32_to_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1896,7 +1896,7 @@ define half @v_fneg_fp_round_fneg_f32_to_f16(float %a) #0 {
   ret half %fneg
 }
 
-define { float, float } @v_fneg_multi_use_fp_round_fneg_f64_to_f32(double %a) #0 {
+define { float, float } @v_fneg_multi_use_fp_round_fneg_f64_to_f32(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1910,7 +1910,7 @@ define { float, float } @v_fneg_multi_use_fp_round_fneg_f64_to_f32(double %a) #0
   ret { float, float } %insert.1
 }
 
-define { half, float } @v_fneg_fp_round_store_use_fneg_f32_to_f16(float %a) #0 {
+define { half, float } @v_fneg_fp_round_store_use_fneg_f32_to_f16(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_round_store_use_fneg_f32_to_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1935,7 +1935,7 @@ define { half, float } @v_fneg_fp_round_store_use_fneg_f32_to_f16(float %a) #0 {
   ret { half, float } %insert.1
 }
 
-define { half, float } @v_fneg_fp_round_multi_use_fneg_f32_to_f16(float %a, float %c) #0 {
+define { half, float } @v_fneg_fp_round_multi_use_fneg_f32_to_f16(float %a, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f32_to_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1965,7 +1965,7 @@ define { half, float } @v_fneg_fp_round_multi_use_fneg_f32_to_f16(float %a, floa
 ; rcp tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_rcp_f32(float %a) #0 {
+define float @v_fneg_rcp_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_rcp_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1976,7 +1976,7 @@ define float @v_fneg_rcp_f32(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_rcp_fneg_f32(float %a) #0 {
+define float @v_fneg_rcp_fneg_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_rcp_fneg_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1988,7 +1988,7 @@ define float @v_fneg_rcp_fneg_f32(float %a) #0 {
   ret float %fneg
 }
 
-define void @v_fneg_rcp_store_use_fneg_f32(float %a, ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) #0 {
+define void @v_fneg_rcp_store_use_fneg_f32(float %a, ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_rcp_store_use_fneg_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2006,7 +2006,7 @@ define void @v_fneg_rcp_store_use_fneg_f32(float %a, ptr addrspace(1) %ptr0, ptr
   ret void
 }
 
-define { float, float } @v_fneg_rcp_multi_use_fneg_f32(float %a, float %c) #0 {
+define { float, float } @v_fneg_rcp_multi_use_fneg_f32(float %a, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2024,7 +2024,7 @@ define { float, float } @v_fneg_rcp_multi_use_fneg_f32(float %a, float %c) #0 {
 }
 
 ; Test getNegatedExpression works for rcp nodes
-define float @v_negated_rcp_f32(float %arg0, float %arg1) #1 {
+define float @v_negated_rcp_f32(float %arg0, float %arg1) nounwind readnone {
 ; GCN-LABEL: v_negated_rcp_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2043,7 +2043,7 @@ define float @v_negated_rcp_f32(float %arg0, float %arg1) #1 {
 ; fmul_legacy tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_mul_legacy_f32(float %a, float %b) #0 {
+define float @v_fneg_mul_legacy_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_legacy_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2054,7 +2054,7 @@ define float @v_fneg_mul_legacy_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define { float, float } @v_fneg_mul_legacy_store_use_mul_legacy_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_mul_legacy_store_use_mul_legacy_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_legacy_store_use_mul_legacy_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2068,7 +2068,7 @@ define { float, float } @v_fneg_mul_legacy_store_use_mul_legacy_f32(float %a, fl
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_legacy_multi_use_mul_legacy_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2083,7 +2083,7 @@ define { float, float } @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float %a, fl
   ret { float, float } %insert.1
 }
 
-define float @v_fneg_mul_legacy_fneg_x_f32(float %a, float %b) #0 {
+define float @v_fneg_mul_legacy_fneg_x_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_legacy_fneg_x_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2095,7 +2095,7 @@ define float @v_fneg_mul_legacy_fneg_x_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_mul_legacy_x_fneg_f32(float %a, float %b) #0 {
+define float @v_fneg_mul_legacy_x_fneg_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_legacy_x_fneg_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2107,7 +2107,7 @@ define float @v_fneg_mul_legacy_x_fneg_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_mul_legacy_fneg_fneg_f32(float %a, float %b) #0 {
+define float @v_fneg_mul_legacy_fneg_fneg_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_legacy_fneg_fneg_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2120,7 +2120,7 @@ define float @v_fneg_mul_legacy_fneg_fneg_f32(float %a, float %b) #0 {
   ret float %fneg
 }
 
-define { float, float } @v_fneg_mul_legacy_store_use_fneg_x_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_mul_legacy_store_use_fneg_x_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_legacy_store_use_fneg_x_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2137,7 +2137,7 @@ define { float, float } @v_fneg_mul_legacy_store_use_fneg_x_f32(float %a, float
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_mul_legacy_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 {
+define { float, float } @v_fneg_mul_legacy_multi_use_fneg_x_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_mul_legacy_multi_use_fneg_x_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2158,7 +2158,7 @@ define { float, float } @v_fneg_mul_legacy_multi_use_fneg_x_f32(float %a, float
 ; sin tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_sin_f32(float %a) #0 {
+define float @v_fneg_sin_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_sin_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2171,7 +2171,7 @@ define float @v_fneg_sin_f32(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_amdgcn_sin_f32(float %a) #0 {
+define float @v_fneg_amdgcn_sin_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_amdgcn_sin_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2186,7 +2186,7 @@ define float @v_fneg_amdgcn_sin_f32(float %a) #0 {
 ; ftrunc tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_trunc_f32(float %a) #0 {
+define float @v_fneg_trunc_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_trunc_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2201,7 +2201,7 @@ define float @v_fneg_trunc_f32(float %a) #0 {
 ; fround tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_round_f32(float %a) #0 {
+define float @v_fneg_round_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: v_fneg_round_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2235,7 +2235,7 @@ define float @v_fneg_round_f32(float %a) #0 {
 ; rint tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_rint_f32(float %a) #0 {
+define float @v_fneg_rint_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_rint_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2250,7 +2250,7 @@ define float @v_fneg_rint_f32(float %a) #0 {
 ; nearbyint tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_nearbyint_f32(float %a) #0 {
+define float @v_fneg_nearbyint_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_nearbyint_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2265,7 +2265,7 @@ define float @v_fneg_nearbyint_f32(float %a) #0 {
 ; fcanonicalize tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_canonicalize_f32(float %a) #0 {
+define float @v_fneg_canonicalize_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_canonicalize_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2280,7 +2280,7 @@ define float @v_fneg_canonicalize_f32(float %a) #0 {
 ; arithmetic.fence tests
 ; --------------------------------------------------------------------------------
 
-define float @v_fneg_arithmetic_fence_f32(float %a) #0 {
+define float @v_fneg_arithmetic_fence_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_arithmetic_fence_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    ;ARITH_FENCE
@@ -2292,7 +2292,7 @@ define float @v_fneg_arithmetic_fence_f32(float %a) #0 {
   ret float %fneg
 }
 
-define float @v_fneg_arithmetic_fence_fmul_f32(float %a, float %b) #0 {
+define float @v_fneg_arithmetic_fence_fmul_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2310,7 +2310,7 @@ define float @v_fneg_arithmetic_fence_fmul_f32(float %a, float %b) #0 {
 ; vintrp tests
 ; --------------------------------------------------------------------------------
 
-define { float, float } @v_fneg_interp_p1_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_interp_p1_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_interp_p1_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2337,7 +2337,7 @@ define { float, float } @v_fneg_interp_p1_f32(float %a, float %b) #0 {
   ret { float, float } %insert.1
 }
 
-define { float, float } @v_fneg_interp_p2_f32(float %a, float %b) #0 {
+define { float, float } @v_fneg_interp_p2_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_interp_p2_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2372,7 +2372,7 @@ define { float, float } @v_fneg_interp_p2_f32(float %a, float %b) #0 {
 ; CopyToReg tests
 ; --------------------------------------------------------------------------------
 
-define void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) #0 {
+define void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_fneg_copytoreg_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2437,7 +2437,7 @@ endif:
 ; --------------------------------------------------------------------------------
 
 ; Can't fold into use, so should fold into source
-define float @v_fneg_inlineasm_f32(float %a, float %b, float %c, i32 %d) #0 {
+define float @v_fneg_inlineasm_f32(float %a, float %b, float %c, i32 %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_inlineasm_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2448,7 +2448,7 @@ define float @v_fneg_inlineasm_f32(float %a, float %b, float %c, i32 %d) #0 {
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
-  call void asm sideeffect "; use $0", "v"(float %fneg) #0
+  call void asm sideeffect "; use $0", "v"(float %fneg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret float %fneg
 }
 
@@ -2457,7 +2457,7 @@ define float @v_fneg_inlineasm_f32(float %a, float %b, float %c, i32 %d) #0 {
 ; --------------------------------------------------------------------------------
 
 ; Can't fold into use, so should fold into source
-define float @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) #0 {
+define float @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_fneg_inlineasm_multi_use_src_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2469,7 +2469,7 @@ define float @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, float %a
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
-  call void asm sideeffect "; use $0", "v"(float %fneg) #0
+  call void asm sideeffect "; use $0", "v"(float %fneg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   ret float %mul
 }
 
@@ -2479,7 +2479,7 @@ define float @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, float %a
 
 ; There are multiple users of the fneg that must use a VOP3
 ; instruction, so there is no penalty
-define { float, float } @multiuse_fneg_2_vop3_users_f32(float %a, float %b, float %c) #0 {
+define { float, float } @multiuse_fneg_2_vop3_users_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: multiuse_fneg_2_vop3_users_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2497,7 +2497,7 @@ define { float, float } @multiuse_fneg_2_vop3_users_f32(float %a, float %b, floa
 
 ; There are multiple users, but both require using a larger encoding
 ; for the modifier.
-define { float, float } @multiuse_fneg_2_vop2_users_f32(float %a, float %b, float %c) #0 {
+define { float, float } @multiuse_fneg_2_vop2_users_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: multiuse_fneg_2_vop2_users_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2514,7 +2514,7 @@ define { float, float } @multiuse_fneg_2_vop2_users_f32(float %a, float %b, floa
 }
 
 ; One user is VOP3 so has no cost to folding the modifier, the other does.
-define { float, float } @multiuse_fneg_vop2_vop3_users_f32(float %a, float %b, float %c) #0 {
+define { float, float } @multiuse_fneg_vop2_vop3_users_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: multiuse_fneg_vop2_vop3_users_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2533,7 +2533,7 @@ define { float, float } @multiuse_fneg_vop2_vop3_users_f32(float %a, float %b, f
 
 ; The use of the fneg requires a code size increase, but folding into
 ; the source does not
-define { float, float } @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, float %a, float %b, float %c, float %d) #0 {
+define { float, float } @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, float %a, float %b, float %c, float %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-SAFE-LABEL: free_fold_src_code_size_cost_use_f32:
 ; GCN-SAFE:       ; %bb.0:
 ; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2559,7 +2559,7 @@ define { float, float } @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %
   ret { float, float } %insert.1
 }
 
-define { double, double } @free_fold_src_code_size_cost_use_f64(double %a, double %b, double %c, double %d) #0 {
+define { double, double } @free_fold_src_code_size_cost_use_f64(double %a, double %b, double %c, double %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: free_fold_src_code_size_cost_use_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2579,7 +2579,7 @@ define { double, double } @free_fold_src_code_size_cost_use_f64(double %a, doubl
 
 ; %trunc.a has one fneg use, but it requires a code size increase and
 ; %the fneg can instead be folded for free into the fma.
-define float @one_use_cost_to_fold_into_src_f32(float %a, float %b, float %c, float %d) #0 {
+define float @one_use_cost_to_fold_into_src_f32(float %a, float %b, float %c, float %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: one_use_cost_to_fold_into_src_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2592,7 +2592,7 @@ define float @one_use_cost_to_fold_into_src_f32(float %a, float %b, float %c, fl
   ret float %fma0
 }
 
-define { float, float } @multi_use_cost_to_fold_into_src(float %a, float %b, float %c, float %d) #0 {
+define { float, float } @multi_use_cost_to_fold_into_src(float %a, float %b, float %c, float %d) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: multi_use_cost_to_fold_into_src:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2613,7 +2613,7 @@ define { float, float } @multi_use_cost_to_fold_into_src(float %a, float %b, flo
 ; undone by the generic combine to pull the fneg out of the fma if
 ; !isFNegFree. We were reporting false for v2f32 even though it will
 ; be split into f32 where it will be free.
-define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
+define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: fneg_fma_fneg_dagcombine_loop:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2635,7 +2635,7 @@ bb:
 
 ; This expects denormal flushing, so can't turn this fmul into fneg
 ; TODO: Keeping this as fmul saves encoding size
-define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
+define float @nnan_fmul_neg1_to_fneg(float %x, float %y) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: nnan_fmul_neg1_to_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2673,7 +2673,7 @@ define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
   ret float %add
 }
 
-define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
+define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: flush_snan_fmul_neg1_to_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3397,49 +3397,43 @@ bb:
   ret float %i1
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fma.f32(float, float, float) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.sin.f32(float) #1
-declare float @llvm.trunc.f32(float) #1
-declare float @llvm.round.f32(float) #1
-declare float @llvm.rint.f32(float) #1
-declare float @llvm.nearbyint.f32(float) #1
-declare float @llvm.roundeven.f32(float) #1
-declare float @llvm.canonicalize.f32(float) #1
-declare float @llvm.arithmetic.fence.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.fma.f64(double, double, double) #1
-
-declare float @llvm.amdgcn.sin.f32(float) #1
-declare float @llvm.amdgcn.rcp.f32(float) #1
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
-
-declare half @llvm.fma.f16(half, half, half) #1
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.sin.f32(float) nounwind readnone
+declare float @llvm.trunc.f32(float) nounwind readnone
+declare float @llvm.round.f32(float) nounwind readnone
+declare float @llvm.rint.f32(float) nounwind readnone
+declare float @llvm.nearbyint.f32(float) nounwind readnone
+declare float @llvm.roundeven.f32(float) nounwind readnone
+declare float @llvm.canonicalize.f32(float) nounwind readnone
+declare float @llvm.arithmetic.fence.f32(float) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare double @llvm.minnum.f64(double, double) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+
+declare float @llvm.amdgcn.sin.f32(float) nounwind readnone
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
+declare float @llvm.amdgcn.rcp.legacy(float) nounwind readnone
+declare float @llvm.amdgcn.fmul.legacy(float, float) nounwind readnone
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+
+declare half @llvm.fma.f16(half, half, half) nounwind readnone
 declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
-declare half @llvm.fmuladd.f16(half, half, half) #1
-declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
-declare half @llvm.sin.f16(half) #1
-declare half @llvm.trunc.f16(half) #1
-declare half @llvm.round.f16(half) #1
-declare half @llvm.rint.f16(half) #1
-declare half @llvm.nearbyint.f16(half) #1
-declare half @llvm.canonicalize.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare half @llvm.amdgcn.sin.f16(half) #1
-declare half @llvm.amdgcn.rcp.f16(half) #1
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
-attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+declare half @llvm.fmuladd.f16(half, half, half) nounwind readnone
+declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) nounwind readnone
+declare half @llvm.sin.f16(half) nounwind readnone
+declare half @llvm.trunc.f16(half) nounwind readnone
+declare half @llvm.round.f16(half) nounwind readnone
+declare half @llvm.rint.f16(half) nounwind readnone
+declare half @llvm.nearbyint.f16(half) nounwind readnone
+declare half @llvm.canonicalize.f16(half) nounwind readnone
+declare half @llvm.minnum.f16(half, half) nounwind readnone
+declare half @llvm.maxnum.f16(half, half) nounwind readnone
+declare half @llvm.amdgcn.sin.f16(half) nounwind readnone
+declare half @llvm.amdgcn.rcp.f16(half) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
index 0ad61066b4b213..c1fc988dfd1ffc 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
@@ -9,7 +9,7 @@
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: {{buffer|flat}}_store_dword [[RESULT]]
-define amdgpu_kernel void @v_fneg_rcp_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rcp_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -21,8 +21,5 @@ define amdgpu_kernel void @v_fneg_rcp_legacy_f32(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.amdgcn.rcp.legacy(float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
index 4364b32e62f8c9..b5b4f30f2aa02b 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -464,7 +464,7 @@ define amdgpu_kernel void @fneg_fabs_v4f16(ptr addrspace(1) %out, <4 x half> %in
   ret void
 }
 
-define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(ptr addrspace(1) %out, <2 x half> %in) #0 {
+define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(ptr addrspace(1) %out, <2 x half> %in) nounwind {
 ; CI-LABEL: fold_user_fneg_fabs_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x2
@@ -689,9 +689,6 @@ define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(ptr addrspac
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
-declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare half @llvm.fabs.f16(half) nounwind readnone
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nounwind readnone
+declare <4 x half> @llvm.fabs.v4f16(<4 x half>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
index cd1ec85eb8d0f3..945b49e6037a89 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@@ -1598,6 +1598,4 @@ bb5:                                              ; preds = %bb, %.entry
 }
 
 
-declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) #0
-
-attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
+declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) nocallback nofree nosync nounwind willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
index 31c1389c940208..9617b8360d786c 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx1100 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
 ; FIXME: Should be able to do scalar op
-define amdgpu_kernel void @s_fneg_f16(ptr addrspace(1) %out, half %in) #0 {
+define amdgpu_kernel void @s_fneg_f16(ptr addrspace(1) %out, half %in) nounwind {
 ; CI-LABEL: s_fneg_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -61,7 +61,7 @@ define amdgpu_kernel void @s_fneg_f16(ptr addrspace(1) %out, half %in) #0 {
 
 ; FIXME: Should be able to use bit operations when illegal type as
 ; well.
-define amdgpu_kernel void @v_fneg_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fneg_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: v_fneg_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
@@ -122,7 +122,7 @@ define amdgpu_kernel void @v_fneg_f16(ptr addrspace(1) %out, ptr addrspace(1) %i
   ret void
 }
 
-define amdgpu_kernel void @s_fneg_free_f16(ptr addrspace(1) %out, i16 %in) #0 {
+define amdgpu_kernel void @s_fneg_free_f16(ptr addrspace(1) %out, i16 %in) nounwind {
 ; CI-LABEL: s_fneg_free_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -177,7 +177,7 @@ define amdgpu_kernel void @s_fneg_free_f16(ptr addrspace(1) %out, i16 %in) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_fneg_fold_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fneg_fold_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: v_fneg_fold_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -239,7 +239,7 @@ define amdgpu_kernel void @v_fneg_fold_f16(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @s_fneg_v2f16(ptr addrspace(1) %out, <2 x half> %in) #0 {
+define amdgpu_kernel void @s_fneg_v2f16(ptr addrspace(1) %out, <2 x half> %in) nounwind {
 ; CI-LABEL: s_fneg_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -293,7 +293,7 @@ define amdgpu_kernel void @s_fneg_v2f16(ptr addrspace(1) %out, <2 x half> %in) #
   ret void
 }
 
-define amdgpu_kernel void @s_fneg_v2f16_nonload(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @s_fneg_v2f16_nonload(ptr addrspace(1) %out) nounwind {
 ; CIVI-LABEL: s_fneg_v2f16_nonload:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -342,7 +342,7 @@ define amdgpu_kernel void @s_fneg_v2f16_nonload(ptr addrspace(1) %out) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: v_fneg_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
@@ -403,7 +403,7 @@ define amdgpu_kernel void @v_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @fneg_free_v2f16(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @fneg_free_v2f16(ptr addrspace(1) %out, i32 %in) nounwind {
 ; CI-LABEL: fneg_free_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -458,7 +458,7 @@ define amdgpu_kernel void @fneg_free_v2f16(ptr addrspace(1) %out, i32 %in) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_fneg_fold_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fneg_fold_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: v_fneg_fold_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -531,7 +531,7 @@ define amdgpu_kernel void @v_fneg_fold_v2f16(ptr addrspace(1) %out, ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @v_extract_fneg_fold_v2f16(ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_extract_fneg_fold_v2f16(ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: v_extract_fneg_fold_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -616,7 +616,7 @@ define amdgpu_kernel void @v_extract_fneg_fold_v2f16(ptr addrspace(1) %in) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(ptr addrspace(1) %in) nounwind {
 ; CIVI-LABEL: v_extract_fneg_no_fold_v2f16:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -671,7 +671,4 @@ define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(ptr addrspace(1) %in) #0
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll b/llvm/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll
index 8401e04d27233e..3b0cc03f74f3e7 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll
@@ -32,6 +32,4 @@ define amdgpu_kernel void @fold_mul_abs(ptr addrspace(1) %arg) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
index dd61d9cf4bb2db..4769dfb8c71463 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir
@@ -1,6 +1,6 @@
 # RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
 --- |
-  define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 {
+  define amdgpu_kernel void @add_f32_1.0_one_f16_use() nounwind {
     %f16.val0 = load volatile half, ptr addrspace(1) undef
     %f16.val1 = load volatile half, ptr addrspace(1) undef
     %f32.val = load volatile float, ptr addrspace(1) undef
@@ -11,7 +11,7 @@
     ret void
   }
 
-  define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 {
+  define amdgpu_kernel void @add_f32_1.0_multi_f16_use() nounwind {
     %f16.val0 = load volatile half, ptr addrspace(1) undef
     %f16.val1 = load volatile half, ptr addrspace(1) undef
     %f32.val = load volatile float, ptr addrspace(1) undef
@@ -22,7 +22,7 @@
     ret void
   }
 
-  define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 {
+  define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () nounwind {
     %f16.val0 = load volatile half, ptr addrspace(1) undef
     %f16.val1 = load volatile half, ptr addrspace(1) undef
     %f32.val = load volatile float, ptr addrspace(1) undef
@@ -33,7 +33,7 @@
     ret void
   }
 
-  define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 {
+  define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () nounwind {
     %f16.val0 = load volatile half, ptr addrspace(1) undef
     %f16.val1 = load volatile half, ptr addrspace(1) undef
     %f32.val = load volatile float, ptr addrspace(1) undef
@@ -46,7 +46,7 @@
     ret void
   }
 
-  define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 {
+  define amdgpu_kernel void @add_i32_1_multi_f16_use() nounwind {
     %f16.val0 = load volatile half, ptr addrspace(1) undef
     %f16.val1 = load volatile half, ptr addrspace(1) undef
     %f16.add0 = fadd half %f16.val0, 0xH0001
@@ -56,7 +56,7 @@
     ret void
   }
 
-  define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 {
+  define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () nounwind {
     %f16.val0 = load volatile half, ptr addrspace(1) undef
     %f16.val1 = load volatile half, ptr addrspace(1) undef
     %f32.val = load volatile float, ptr addrspace(1) undef
@@ -69,7 +69,7 @@
     ret void
   }
 
-  define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 {
+  define amdgpu_kernel void @add_f16_1.0_multi_f32_use() nounwind {
     %f32.val0 = load volatile float, ptr addrspace(1) undef
     %f32.val1 = load volatile float, ptr addrspace(1) undef
     %f32.val = load volatile float, ptr addrspace(1) undef
@@ -80,7 +80,7 @@
     ret void
   }
 
-  define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 {
+  define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() nounwind {
     %f16.val0 = load volatile half, ptr addrspace(1) undef
     %f16.val1 = load volatile half, ptr addrspace(1) undef
     %f32.val = load volatile half, ptr addrspace(1) undef
@@ -91,7 +91,7 @@
     ret void
   }
 
-  define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 {
+  define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() nounwind {
     %f16.val0 = load volatile half, ptr addrspace(1) undef
     %f16.val1 = load volatile half, ptr addrspace(1) undef
     %f32.val = load volatile half, ptr addrspace(1) undef
@@ -102,8 +102,6 @@
     ret void
   }
 
-  attributes #0 = { nounwind }
-
 ...
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/fold-multiple.mir b/llvm/test/CodeGen/AMDGPU/fold-multiple.mir
index d0d8542d1a1b24..6a1d17fcb524c7 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-multiple.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-multiple.mir
@@ -1,11 +1,9 @@
 # RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
 --- |
-  define amdgpu_kernel void @test() #0 {
+  define amdgpu_kernel void @test() nounwind {
     ret void
   }
 
-  attributes #0 = { nounwind }
-
 ...
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll
index 50a27d42322d7a..4d9d2a02fea89e 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll
@@ -40,7 +40,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -208,5 +208,3 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
   %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
   ret <2 x i16> %ret
 }
-
-attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
index 18d2e52e8f9002..e3e114105bc89b 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -3,10 +3,10 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-declare float @llvm.fabs.f32(float) #1
-declare double @llvm.fabs.f64(double) #1
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
 
-define amdgpu_kernel void @test_isinf_pattern(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isinf_pattern(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_isinf_pattern:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -47,14 +47,14 @@ define amdgpu_kernel void @test_isinf_pattern(ptr addrspace(1) nocapture %out, f
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %fabs = tail call float @llvm.fabs.f32(float %x) nounwind readnone
   %cmp = fcmp oeq float %fabs, 0x7FF0000000000000
   %ext = zext i1 %cmp to i32
   store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_not_isinf_pattern_0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -95,14 +95,14 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %fabs = tail call float @llvm.fabs.f32(float %x) nounwind readnone
   %cmp = fcmp ueq float %fabs, 0x7FF0000000000000
   %ext = zext i1 %cmp to i32
   store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_not_isinf_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_not_isinf_pattern_1(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_not_isinf_pattern_1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -132,14 +132,14 @@ define amdgpu_kernel void @test_not_isinf_pattern_1(ptr addrspace(1) nocapture %
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %fabs = tail call float @llvm.fabs.f32(float %x) nounwind readnone
   %cmp = fcmp oeq float %fabs, 0xFFF0000000000000
   %ext = zext i1 %cmp to i32
   store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_isfinite_pattern_0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -181,7 +181,7 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ord = fcmp ord float %x, 0.000000e+00
-  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) nounwind readnone
   %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
@@ -189,7 +189,7 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
   ret void
 }
 
-define amdgpu_kernel void @test_isfinite_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_1(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_isfinite_pattern_1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -238,7 +238,7 @@ define amdgpu_kernel void @test_isfinite_pattern_1(ptr addrspace(1) nocapture %o
 }
 
 ; Use negative infinity
-define amdgpu_kernel void @test_isfinite_not_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_not_pattern_0(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_isfinite_not_pattern_0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -278,7 +278,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_0(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ord = fcmp ord float %x, 0.000000e+00
-  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) nounwind readnone
   %ninf = fcmp une float %x.fabs, 0xFFF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
@@ -287,7 +287,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_0(ptr addrspace(1) nocaptur
 }
 
 ; No fabs
-define amdgpu_kernel void @test_isfinite_not_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_not_pattern_1(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_isfinite_not_pattern_1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -343,7 +343,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_1(ptr addrspace(1) nocaptur
 }
 
 ; fabs of different value
-define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocapture %out, float %x, float %y) #0 {
+define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocapture %out, float %x, float %y) nounwind {
 ; SI-LABEL: test_isfinite_not_pattern_2:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -389,7 +389,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ord = fcmp ord float %x, 0.000000e+00
-  %x.fabs = tail call float @llvm.fabs.f32(float %y) #1
+  %x.fabs = tail call float @llvm.fabs.f32(float %y) nounwind readnone
   %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
@@ -398,7 +398,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
 }
 
 ; Wrong ordered compare type
-define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_isfinite_not_pattern_3:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -446,7 +446,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ord = fcmp uno float %x, 0.000000e+00
-  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) nounwind readnone
   %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
@@ -454,7 +454,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
   ret void
 }
 
-define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_isfinite_pattern_4:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -496,7 +496,7 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ord = fcmp ord float %x, 0.000000e+00
-  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) nounwind readnone
   %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
@@ -504,7 +504,7 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
   ret void
 }
 
-define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1) nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1) nocapture %out, float %x) nounwind {
 ; SI-LABEL: test_isfinite_pattern_4_commute_and:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -546,7 +546,7 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1)
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ord = fcmp ord float %x, 0.000000e+00
-  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) nounwind readnone
   %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ninf, %ord
   %ext = zext i1 %and to i32
@@ -554,7 +554,7 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrspace(1) nocapture %out, float %x, [8 x i32], float %y) #0 {
+define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrspace(1) nocapture %out, float %x, [8 x i32], float %y) nounwind {
 ; SI-LABEL: test_not_isfinite_pattern_4_wrong_ord_test:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0x14
@@ -607,7 +607,7 @@ define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrsp
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ord = fcmp ord float %x, %y
-  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) nounwind readnone
   %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
@@ -615,7 +615,7 @@ define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
+define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %out, half %x) nounwind {
 ; SI-LABEL: test_isinf_pattern_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -657,14 +657,14 @@ define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %ou
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %fabs = tail call half @llvm.fabs.f16(half %x) #1
+  %fabs = tail call half @llvm.fabs.f16(half %x) nounwind readnone
   %cmp = fcmp oeq half %fabs, 0xH7C00
   %ext = zext i1 %cmp to i32
   store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocapture %out, half %x) nounwind {
 ; SI-LABEL: test_isfinite_pattern_0_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -710,7 +710,7 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ord = fcmp ord half %x, 0.0
-  %x.fabs = tail call half @llvm.fabs.f16(half %x) #1
+  %x.fabs = tail call half @llvm.fabs.f16(half %x) nounwind readnone
   %ninf = fcmp une half %x.fabs, 0xH7C00
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
@@ -718,7 +718,7 @@ define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocaptur
   ret void
 }
 
-define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocapture %out, half %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocapture %out, half %x) nounwind {
 ; SI-LABEL: test_isfinite_pattern_4_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -764,7 +764,7 @@ define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocaptur
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ord = fcmp ord half %x, 0.0
-  %x.fabs = tail call half @llvm.fabs.f16(half %x) #1
+  %x.fabs = tail call half @llvm.fabs.f16(half %x) nounwind readnone
   %ninf = fcmp one half %x.fabs, 0xH7C00
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
@@ -772,7 +772,4 @@ define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocaptur
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare half @llvm.fabs.f16(half) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
index 767d347bcfaa15..aa9801cd650050 100644
--- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
@@ -1178,7 +1178,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1220,7 +1220,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1249,7 +1249,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1291,7 +1291,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1339,7 +1339,7 @@ main_body:
   ret double %ret
 }
 
-define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) #1 {
+define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1379,7 +1379,7 @@ main_body:
   ret double %ret
 }
 
-define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) #1 {
+define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1404,7 +1404,7 @@ main_body:
   ret double %ret
 }
 
-define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) #1 {
+define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1522,7 +1522,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -1564,7 +1564,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1593,7 +1593,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -1636,7 +1636,7 @@ main_body:
   ret void
 }
 
-define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
+define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1676,7 +1676,7 @@ main_body:
   ret double %ret
 }
 
-define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
+define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1701,7 +1701,7 @@ main_body:
   ret double %ret
 }
 
-define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
+define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2017,7 +2017,7 @@ main_body:
   ret double %ret
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
@@ -2043,7 +2043,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
@@ -2069,7 +2069,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #4 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s2, s[0:1], 0x24
@@ -2118,7 +2118,7 @@ main_body:
   ret void
 }
 
-define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) #1 {
+define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2140,7 +2140,7 @@ main_body:
   ret double %ret
 }
 
-define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) #2 {
+define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2163,7 +2163,7 @@ main_body:
   ret double %ret
 }
 
-define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) #3 {
+define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) "denormal-fp-math"="ieee,ieee" {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2185,9 +2185,3 @@ main_body:
   %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
 }
-
-attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" }
-attributes #2 = { "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #3 = { "denormal-fp-math"="ieee,ieee" }
-attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
index 5f76c5400e5ab8..ce6b35c0157dfe 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare double @llvm.fabs.f64(double) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
 
 ; FUNC-LABEL: @fp_to_sint_f64_i32
 ; SI: v_cvt_i32_f64_e32
@@ -57,7 +57,7 @@ define amdgpu_kernel void @fp_to_sint_i64_f64(ptr addrspace(1) %out, ptr addrspa
 
 ; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1:
 ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fp_to_sint_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_sint_f64_to_i1(ptr addrspace(1) %out, double %in) nounwind {
   %conv = fptosi double %in to i1
   store i1 %conv, ptr addrspace(1) %out
   ret void
@@ -65,12 +65,9 @@ define amdgpu_kernel void @fp_to_sint_f64_to_i1(ptr addrspace(1) %out, double %i
 
 ; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1:
 ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @fp_to_sint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_sint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) nounwind {
   %in.fabs = call double @llvm.fabs.f64(double %in)
   %conv = fptosi double %in.fabs to i1
   store i1 %conv, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
index c3b4e6f964db1a..aadb3317cb8979 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare double @llvm.fabs.f64(double) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
 
 ; SI-LABEL: {{^}}fp_to_uint_i32_f64:
 ; SI: v_cvt_u32_f64_e32
@@ -71,7 +71,7 @@ define amdgpu_kernel void @fp_to_uint_v4i64_v4f64(ptr addrspace(1) %out, <4 x do
 
 ; FUNC-LABEL: {{^}}fp_to_uint_f64_to_i1:
 ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fp_to_uint_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_uint_f64_to_i1(ptr addrspace(1) %out, double %in) nounwind {
   %conv = fptoui double %in to i1
   store i1 %conv, ptr addrspace(1) %out
   ret void
@@ -79,12 +79,9 @@ define amdgpu_kernel void @fp_to_uint_f64_to_i1(ptr addrspace(1) %out, double %i
 
 ; FUNC-LABEL: {{^}}fp_to_uint_fabs_f64_to_i1:
 ; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @fp_to_uint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) #0 {
+define amdgpu_kernel void @fp_to_uint_fabs_f64_to_i1(ptr addrspace(1) %out, double %in) nounwind {
   %in.fabs = call double @llvm.fabs.f64(double %in)
   %conv = fptoui double %in.fabs to i1
   store i1 %conv, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fpext-free.ll b/llvm/test/CodeGen/AMDGPU/fpext-free.ll
index 3752100b7dc0f8..d5e0fb126235d7 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX89 %s
 
 ;  fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
-define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
+define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fpext_fmul_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -38,7 +38,7 @@ entry:
 }
 
 ; f16->f64 is not free.
-define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) #0 {
+define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fpext_fmul_f16_to_f64:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -66,7 +66,7 @@ entry:
 }
 
 ; f32->f64 is not free.
-define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) #0 {
+define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fpext_fmul_f32_to_f64:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -91,7 +91,7 @@ entry:
 }
 
 ; fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
-define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) #0 {
+define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fpext_fmul_f16_to_f32_commute:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -123,7 +123,7 @@ entry:
 
 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
 ;   -> (fma x, y, (fma (fpext u), (fpext v), z))
-define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
+define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_muladd_fpext_fmul_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -159,7 +159,7 @@ entry:
 
 ; fold (fadd x, (fma y, z, (fpext (fmul u, v)))
 ;   -> (fma y, z, (fma (fpext u), (fpext v), x))
-define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
+define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_muladd_fpext_fmul_f16_to_f32_commute:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -193,7 +193,7 @@ entry:
   ret float %add
 }
 
-define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
+define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fmad_fpext_fmul_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -230,7 +230,7 @@ entry:
 
 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
 ;   -> (fma x, y, (fma (fpext u), (fpext v), z))
-define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
+define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fma_fpext_fmul_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -264,7 +264,7 @@ entry:
   ret float %add
 }
 
-define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
+define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fma_fpext_fmul_f16_to_f32_commute:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -300,7 +300,7 @@ entry:
 
 ; fold (fadd x, (fpext (fma y, z, (fmul u, v)))
 ;   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
-define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
+define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fpext_fmuladd_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -335,7 +335,7 @@ entry:
   ret float %add
 }
 
-define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
+define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fpext_fma_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -370,7 +370,7 @@ entry:
   ret float %add
 }
 
-define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
+define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) nounwind readnone speculatable {
 ; GFX11-LABEL: fadd_fpext_fma_f16_to_f32_commute:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -407,7 +407,7 @@ entry:
 
 ; fold (fsub (fpext (fmul x, y)), z)
 ;   -> (fma (fpext x), (fpext y), (fneg z))
-define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
+define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fsub_fpext_fmul_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -439,7 +439,7 @@ entry:
 
 ; fold (fsub x, (fpext (fmul y, z)))
 ;   -> (fma (fneg (fpext y)), (fpext z), x)
-define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 {
+define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) nounwind readnone speculatable {
 ; GFX11-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32_commute:
 ; GFX11-F32FLUSH:       ; %bb.0: ; %entry
 ; GFX11-F32FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -477,7 +477,7 @@ entry:
 
 ; fold (fsub (fpext (fneg (fmul, x, y))), z)
 ;   -> (fneg (fma (fpext x), (fpext y), z))
-define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
+define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fsub_fpext_fneg_fmul_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -510,7 +510,7 @@ entry:
 
 ; fold (fsub (fneg (fpext (fmul, x, y))), z)
 ;   -> (fneg (fma (fpext x)), (fpext y), z)
-define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
+define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) nounwind readnone speculatable {
 ; GFX11-LABEL: fsub_fneg_fpext_fmul_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -543,7 +543,7 @@ entry:
 
 ; fold (fsub (fmad x, y, (fpext (fmul u, v))), z)
 ;    -> (fmad x, y (fmad (fpext u), (fpext v), (fneg z)))
-define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 {
+define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) nounwind readnone speculatable {
 ; GFX11-LABEL: fsub_muladd_fpext_mul_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -580,7 +580,7 @@ entry:
 ;  fold (fsub (fpext (fmad x, y, (fmul u, v))), z)
 ;    -> (fmad (fpext x), (fpext y),
 ;            (fmad (fpext u), (fpext v), (fneg z)))
-define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) #0 {
+define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) nounwind readnone speculatable {
 ; GFX11-LABEL: fsub_fpext_muladd_mul_f16_to_f32:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -610,7 +610,7 @@ entry:
 
 ; fold (fsub x, (fmad y, z, (fpext (fmul u, v))))
 ;   -> (fmad (fneg y), z, (fmad (fneg (fpext u)), (fpext v), x))
-define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 {
+define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) nounwind readnone speculatable {
 ; GFX11-LABEL: fsub_muladd_fpext_mul_f16_to_f32_commute:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -646,7 +646,7 @@ entry:
 ; fold (fsub x, (fpext (fma y, z, (fmul u, v))))
 ;    -> (fma (fneg (fpext y)), (fpext z),
 ;            (fma (fneg (fpext u)), (fpext v), x))
-define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
+define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) nounwind readnone speculatable {
 ; GFX11-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute:
 ; GFX11:       ; %bb.0: ; %entry
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -674,9 +674,7 @@ entry:
   ret float %add
 }
 
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare half @llvm.fmuladd.f16(half, half, half) #0
-declare half @llvm.fma.f16(half, half, half) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone speculatable
+declare float @llvm.fma.f32(float, float, float) nounwind readnone speculatable
+declare half @llvm.fmuladd.f16(half, half, half) nounwind readnone speculatable
+declare half @llvm.fma.f16(half, half, half) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
index 82c25c01b17792..e3262c045b290a 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
@@ -1028,6 +1028,4 @@ entry:
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-
-attributes #1 = { nounwind readnone }
+declare half @llvm.fabs.f16(half) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 6cc7368eeae616..2f82864fb92406 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -1003,7 +1003,7 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %a) #0 {
+    ptr addrspace(1) %a) nounwind {
 entry:
   %a.val = load float, ptr addrspace(1) %a
   %a.fabs = call float @llvm.fabs.f32(float %a.val)
@@ -1142,7 +1142,7 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %a) #0 {
+    ptr addrspace(1) %a) nounwind {
 entry:
   %a.val = load float, ptr addrspace(1) %a
   %r.val = fptrunc float %a.val to half
@@ -1281,7 +1281,7 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %a) #0 {
+    ptr addrspace(1) %a) nounwind {
 entry:
   %a.val = load float, ptr addrspace(1) %a
   %a.fabs = call float @llvm.fabs.f32(float %a.val)
@@ -1427,7 +1427,7 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-GISEL-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %a) #0 {
+    ptr addrspace(1) %a) nounwind {
 entry:
   %a.val = load float, ptr addrspace(1) %a
   %r.val = fptrunc float %a.val to half
@@ -1437,7 +1437,4 @@ entry:
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.fabs.f32(float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index e361aa4db2aa94..ea30a2a6a4e097 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -2325,25 +2325,23 @@ entry:
   ret <2 x double> %cond6
 }
 
-declare half @llvm.floor.f16(half) #0
-declare float @llvm.floor.f32(float) #0
-declare double @llvm.floor.f64(double) #0
-declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
-declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
-declare <2 x half> @llvm.floor.v2f16(<2 x half>) #0
-declare float @llvm.trunc.f32(float) #0
-declare float @llvm.minnum.f32(float, float) #0
-declare half @llvm.minnum.f16(half, half) #0
-declare double @llvm.minnum.f64(double, double) #0
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0
-declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
-declare float @llvm.maxnum.f32(float, float) #0
-declare float @llvm.fabs.f32(float) #0
-declare double @llvm.fabs.f64(double) #0
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0
-declare half @llvm.fabs.f16(half) #0
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare half @llvm.floor.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.floor.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare double @llvm.floor.f64(double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.floor.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.floor.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.trunc.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.minnum.f32(float, float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.minnum.f16(half, half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare double @llvm.minnum.f64(double, double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.maxnum.f32(float, float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.fabs.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare double @llvm.fabs.f64(double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fabs.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/fract.f64.ll b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
index 1fae9970fc1095..9ea0fa669845f4 100644
--- a/llvm/test/CodeGen/AMDGPU/fract.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
@@ -5,8 +5,8 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,FUNC %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s
 
-declare double @llvm.fabs.f64(double) #0
-declare double @llvm.floor.f64(double) #0
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare double @llvm.floor.f64(double) nounwind readnone
 
 ; FUNC-LABEL: {{^}}fract_f64:
 ; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
@@ -24,7 +24,7 @@ declare double @llvm.floor.f64(double) #0
 ; CI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[FLOORX]]
 
 ; GCN: buffer_store_dwordx2 [[FRACT]]
-define amdgpu_kernel void @fract_f64(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @fract_f64(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %x = load double, ptr addrspace(1) %src
   %floor.x = call double @llvm.floor.f64(double %x)
   %fract = fsub double %x, %floor.x
@@ -48,7 +48,7 @@ define amdgpu_kernel void @fract_f64(ptr addrspace(1) %out, ptr addrspace(1) %sr
 ; CI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -[[X]], -[[FLOORX]]
 
 ; GCN: buffer_store_dwordx2 [[FRACT]]
-define amdgpu_kernel void @fract_f64_neg(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @fract_f64_neg(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %x = load double, ptr addrspace(1) %src
   %neg.x = fneg double %x
   %floor.neg.x = call double @llvm.floor.f64(double %neg.x)
@@ -73,7 +73,7 @@ define amdgpu_kernel void @fract_f64_neg(ptr addrspace(1) %out, ptr addrspace(1)
 ; CI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|[[X]]|, -[[FLOORX]]
 
 ; GCN: buffer_store_dwordx2 [[FRACT]]
-define amdgpu_kernel void @fract_f64_neg_abs(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @fract_f64_neg_abs(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %x = load double, ptr addrspace(1) %src
   %abs.x = call double @llvm.fabs.f64(double %x)
   %neg.abs.x = fneg double %abs.x
@@ -84,7 +84,7 @@ define amdgpu_kernel void @fract_f64_neg_abs(ptr addrspace(1) %out, ptr addrspac
 }
 
 ; FUNC-LABEL: {{^}}multi_use_floor_fract_f64:
-define amdgpu_kernel void @multi_use_floor_fract_f64(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @multi_use_floor_fract_f64(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %x = load double, ptr addrspace(1) %src
   %floor.x = call double @llvm.floor.f64(double %x)
   %fract = fsub double %x, %floor.x
@@ -92,6 +92,3 @@ define amdgpu_kernel void @multi_use_floor_fract_f64(ptr addrspace(1) %out, ptr
   store volatile double %fract, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/fract.ll b/llvm/test/CodeGen/AMDGPU/fract.ll
index bc6ec96c102e48..368ccf67629709 100644
--- a/llvm/test/CodeGen/AMDGPU/fract.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract.ll
@@ -4,15 +4,15 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck --check-prefix=GCN %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck --check-prefix=GCN %s
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.floor.f32(float) #0
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
 
 ; GCN-LABEL: {{^}}fract_f32:
 ; GCN: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
 ; GCN: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[INPUT]], [[FLR]]
 
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @fract_f32(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @fract_f32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %x = load float, ptr addrspace(1) %src
   %floor.x = call float @llvm.floor.f32(float %x)
   %fract = fsub float %x, %floor.x
@@ -24,7 +24,7 @@ define amdgpu_kernel void @fract_f32(ptr addrspace(1) %out, ptr addrspace(1) %sr
 ; GCN: v_floor_f32_e64 [[FLR:v[0-9]+]], -[[INPUT:v[0-9]+]]
 ; GCN: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT]], [[FLR]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @fract_f32_neg(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @fract_f32_neg(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %x = load float, ptr addrspace(1) %src
   %x.neg = fsub float -0.0, %x
   %floor.x.neg = call float @llvm.floor.f32(float %x.neg)
@@ -37,7 +37,7 @@ define amdgpu_kernel void @fract_f32_neg(ptr addrspace(1) %out, ptr addrspace(1)
 ; GCN: v_floor_f32_e64 [[FLR:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
 ; GCN: v_sub_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT]]|, [[FLR]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @fract_f32_neg_abs(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @fract_f32_neg_abs(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %x = load float, ptr addrspace(1) %src
   %abs.x = call float @llvm.fabs.f32(float %x)
   %neg.abs.x = fsub float -0.0, %abs.x
@@ -53,7 +53,7 @@ define amdgpu_kernel void @fract_f32_neg_abs(ptr addrspace(1) %out, ptr addrspac
 
 ; GCN: buffer_store_dword [[FLOOR]]
 ; GCN: buffer_store_dword [[FRACT]]
-define amdgpu_kernel void @multi_use_floor_fract_f32(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @multi_use_floor_fract_f32(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %x = load float, ptr addrspace(1) %src
   %floor.x = call float @llvm.floor.f32(float %x)
   %fract = fsub float %x, %floor.x
@@ -61,6 +61,3 @@ define amdgpu_kernel void @multi_use_floor_fract_f32(ptr addrspace(1) %out, ptr
   store volatile float %fract, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index eeddc2211ea97a..2c2c867954cac2 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -19,7 +19,7 @@
 ; MUBUF-NOT: v_mov
 
 ; GCN: ds_write_b32 v0, v0
-define void @func_mov_fi_i32() #0 {
+define void @func_mov_fi_i32() nounwind {
   %alloca = alloca i32, addrspace(5)
   store volatile ptr addrspace(5) %alloca, ptr addrspace(3) undef
   ret void
@@ -44,7 +44,7 @@ define void @func_mov_fi_i32() #0 {
 ; GFX9-MUBUF-NEXT:   v_add_u32_e32 v0, 4, [[SCALED]]
 ; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, [[ADD]]
 ; GFX9-NEXT:         ds_write_b32 v0, v0
-define void @func_mov_fi_i32_offset() #0 {
+define void @func_mov_fi_i32_offset() nounwind {
   %alloca0 = alloca i32, addrspace(5)
   %alloca1 = alloca i32, addrspace(5)
   store volatile ptr addrspace(5) %alloca0, ptr addrspace(3) undef
@@ -69,7 +69,7 @@ define void @func_mov_fi_i32_offset() #0 {
 
 ; GCN-NOT: v_mov
 ; GCN: ds_write_b32 v0, v0
-define void @func_add_constant_to_fi_i32() #0 {
+define void @func_add_constant_to_fi_i32() nounwind {
   %alloca = alloca [2 x i32], align 4, addrspace(5)
   %gep0 = getelementptr inbounds [2 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
   store volatile ptr addrspace(5) %gep0, ptr addrspace(3) undef
@@ -91,7 +91,7 @@ define void @func_add_constant_to_fi_i32() #0 {
 ; GCN-NEXT: v_mul_lo_u32 v0, v0, 9
 ; GCN-NOT: v_mov
 ; GCN: ds_write_b32 v0, v0
-define void @func_other_fi_user_i32() #0 {
+define void @func_other_fi_user_i32() nounwind {
   %alloca = alloca [2 x i32], align 4, addrspace(5)
   %ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
   %mul = mul i32 %ptrtoint, 9
@@ -103,7 +103,7 @@ define void @func_other_fi_user_i32() #0 {
 ; GCN: v_mov_b32_e32 v1, 15{{$}}
 ; MUBUF:        buffer_store_dword v1, v0, s[0:3], 0 offen{{$}}
 ; GFX9-FLATSCR: scratch_store_dword v0, v1, off{{$}}
-define void @func_store_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
+define void @func_store_private_arg_i32_ptr(ptr addrspace(5) %ptr) nounwind {
   store volatile i32 15, ptr addrspace(5) %ptr
   ret void
 }
@@ -112,7 +112,7 @@ define void @func_store_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
 ; GCN: s_waitcnt
 ; MUBUF-NEXT:        buffer_load_dword v0, v0, s[0:3], 0 offen glc{{$}}
 ; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off glc{{$}}
-define void @func_load_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
+define void @func_load_private_arg_i32_ptr(ptr addrspace(5) %ptr) nounwind {
   %val = load volatile i32, ptr addrspace(5) %ptr
   ret void
 }
@@ -131,7 +131,7 @@ define void @func_load_private_arg_i32_ptr(ptr addrspace(5) %ptr) #0 {
 
 ; GCN-NOT: v_mov
 ; GCN: ds_write_b32 v0, v0
-define void @void_func_byval_struct_i8_i32_ptr(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+define void @void_func_byval_struct_i8_i32_ptr(ptr addrspace(5) byval({ i8, i32 }) %arg0) nounwind {
   %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
   %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
   %load1 = load i32, ptr addrspace(5) %gep1
@@ -145,7 +145,7 @@ define void @void_func_byval_struct_i8_i32_ptr(ptr addrspace(5) byval({ i8, i32
 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
 ; GFX9-FLATSCR-NEXT: scratch_load_ubyte v0, off, s32
 ; GFX9-FLATSCR-NEXT: scratch_load_dword v1, off, s32 offset:4
-define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8, i32 }) %arg0) nounwind {
   %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
   %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
   %load0 = load i8, ptr addrspace(5) %gep0
@@ -172,7 +172,7 @@ define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8
 ; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
 
 ; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
-define void @void_func_byval_struct_i8_i32_ptr_nonentry_block(ptr addrspace(5) byval({ i8, i32 }) %arg0, i32 %arg2) #0 {
+define void @void_func_byval_struct_i8_i32_ptr_nonentry_block(ptr addrspace(5) byval({ i8, i32 }) %arg0, i32 %arg2) nounwind {
   %cmp = icmp eq i32 %arg2, 0
   br i1 %cmp, label %bb, label %ret
 
@@ -202,7 +202,7 @@ ret:
 
 ; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9
 ; GCN: ds_write_b32 v0, [[VZ]]
-define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
+define void @func_other_fi_user_non_inline_imm_offset_i32() nounwind {
   %alloca0 = alloca [128 x i32], align 4, addrspace(5)
   %alloca1 = alloca [8 x i32], align 4, addrspace(5)
   %gep0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca0, i32 0, i32 65
@@ -227,7 +227,7 @@ define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
 
 ; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9
 ; GCN: ds_write_b32 v0, [[VZ]]
-define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 {
+define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() nounwind {
   %alloca0 = alloca [128 x i32], align 4, addrspace(5)
   %alloca1 = alloca [8 x i32], align 4, addrspace(5)
   %vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
@@ -240,7 +240,7 @@ define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 {
   ret void
 }
 
-declare void @func(ptr addrspace(5) nocapture) #0
+declare void @func(ptr addrspace(5) nocapture) nounwind
 
 ; undef flag not preserved in eliminateFrameIndex when handling the
 ; stores in the middle block.
@@ -255,7 +255,7 @@ declare void @func(ptr addrspace(5) nocapture) #0
 ; FLATSCR: scratch_store_dword v0, off, s33 offset:
 ; FLATSCR: scratch_store_dword v0, off, s33 offset:
 ; FLATSCR: scratch_store_dword v{{[0-9]+}}, off, s33 offset:
-define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
+define void @undefined_stack_store_reg(float %arg, i32 %arg1) nounwind {
 bb:
   %tmp = alloca <4 x float>, align 16, addrspace(5)
   %tmp2 = insertelement <4 x float> undef, float %arg, i32 0
@@ -288,7 +288,7 @@ bb5:
 ; GFX9-FLATSCR-NEXT: v_or_b32_e32 [[PTR:v[0-9]+]], 4, [[SP]]
 
 ; GCN: ds_write_b32 v{{[0-9]+}}, [[PTR]]
-define void @alloca_ptr_nonentry_block(i32 %arg0) #0 {
+define void @alloca_ptr_nonentry_block(i32 %arg0) nounwind {
   %alloca0 = alloca { i8, i32 }, align 8, addrspace(5)
   %cmp = icmp eq i32 %arg0, 0
   br i1 %cmp, label %bb, label %ret
@@ -331,5 +331,3 @@ entry:
   store i16 %scratch0.load, ptr addrspace(3) %addr1, align 2
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/frame-lowering-entry-all-sgpr-used.mir b/llvm/test/CodeGen/AMDGPU/frame-lowering-entry-all-sgpr-used.mir
index d8542bd0756796..649f8e81cacb55 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-lowering-entry-all-sgpr-used.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-lowering-entry-all-sgpr-used.mir
@@ -3,10 +3,9 @@
 # CHECK-LABEL: all_sgpr_used
 # CHECK: V_CMP_LT_U32_e64
 --- |
-  define amdgpu_kernel void @all_sgpr_used() #0 {
+  define amdgpu_kernel void @all_sgpr_used() "amdgpu-num-sgpr"="8" "frame-pointer"="all" {
     ret void
   }
-  attributes #0 = { "amdgpu-num-sgpr"="8" "frame-pointer"="all"}
 ...
 ---
 name:            all_sgpr_used
diff --git a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
index b2ba63cc5c14f3..eb9ce36de33ea2 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
@@ -5,11 +5,9 @@
 # CHECK: BUFFER_STORE_DWORD_OFFSET
 --- |
 
-  define amdgpu_kernel void @foo() #0 {
+  define amdgpu_kernel void @foo() "frame-pointer"="all" {
     ret void
   }
-
-  attributes #0 = {  "frame-pointer"="all" }
 ...
 ---
 name:            foo
diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index 3922b5404d7786..c9e192703f6446 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -4,9 +4,9 @@
 
 ; Check frame setup where SGPR spills to VGPRs are disabled or enabled.
 
-declare hidden void @external_void_func_void() #0
+declare hidden void @external_void_func_void() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
-define void @callee_with_stack_and_call() #0 {
+define void @callee_with_stack_and_call() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 ; SPILL-TO-VGPR-LABEL: callee_with_stack_and_call:
 ; SPILL-TO-VGPR:       ; %bb.0:
 ; SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -97,5 +97,3 @@ define void @callee_with_stack_and_call() #0 {
   call void @external_void_func_void()
   ret void
 }
-
-attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll
index 0d59021b69019f..2f2e269a3e7413 100644
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -206,7 +206,7 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                      ptr addrspace(1) %in2) #0 {
+                      ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4
    %r0 = load half, ptr addrspace(1) %in1, align 4
    %r1 = load half, ptr addrspace(1) %gep2, align 4
@@ -375,7 +375,7 @@ define amdgpu_kernel void @fast_frem_f16(ptr addrspace(1) %out, ptr addrspace(1)
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                      ptr addrspace(1) %in2) #0 {
+                      ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4
    %r0 = load half, ptr addrspace(1) %in1, align 4
    %r1 = load half, ptr addrspace(1) %gep2, align 4
@@ -544,7 +544,7 @@ define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace(
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                             ptr addrspace(1) %in2) #1 {
+                             ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4
    %r0 = load half, ptr addrspace(1) %in1, align 4
    %r1 = load half, ptr addrspace(1) %gep2, align 4
@@ -790,7 +790,7 @@ define amdgpu_kernel void @frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                      ptr addrspace(1) %in2) #0 {
+                      ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4
    %r0 = load float, ptr addrspace(1) %in1, align 4
    %r1 = load float, ptr addrspace(1) %gep2, align 4
@@ -951,7 +951,7 @@ define amdgpu_kernel void @fast_frem_f32(ptr addrspace(1) %out, ptr addrspace(1)
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                      ptr addrspace(1) %in2) #0 {
+                      ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4
    %r0 = load float, ptr addrspace(1) %in1, align 4
    %r1 = load float, ptr addrspace(1) %gep2, align 4
@@ -1112,7 +1112,7 @@ define amdgpu_kernel void @unsafe_frem_f32(ptr addrspace(1) %out, ptr addrspace(
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                             ptr addrspace(1) %in2) #1 {
+                             ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4
    %r0 = load float, ptr addrspace(1) %in1, align 4
    %r1 = load float, ptr addrspace(1) %gep2, align 4
@@ -1368,7 +1368,7 @@ define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                      ptr addrspace(1) %in2) #0 {
+                      ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %r0 = load double, ptr addrspace(1) %in1, align 8
    %r1 = load double, ptr addrspace(1) %in2, align 8
    %r2 = frem double %r0, %r1
@@ -1593,7 +1593,7 @@ define amdgpu_kernel void @fast_frem_f64(ptr addrspace(1) %out, ptr addrspace(1)
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                      ptr addrspace(1) %in2) #0 {
+                      ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %r0 = load double, ptr addrspace(1) %in1, align 8
    %r1 = load double, ptr addrspace(1) %in2, align 8
    %r2 = frem fast double %r0, %r1
@@ -1818,7 +1818,7 @@ define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace(
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                             ptr addrspace(1) %in2) #1 {
+                             ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %r0 = load double, ptr addrspace(1) %in1, align 8
    %r1 = load double, ptr addrspace(1) %in2, align 8
    %r2 = frem afn double %r0, %r1
@@ -2127,7 +2127,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                        ptr addrspace(1) %in2) #0 {
+                        ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr <2 x half>, ptr addrspace(1) %in2, i32 4
    %r0 = load <2 x half>, ptr addrspace(1) %in1, align 8
    %r1 = load <2 x half>, ptr addrspace(1) %gep2, align 8
@@ -2613,7 +2613,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                        ptr addrspace(1) %in2) #0 {
+                        ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr <4 x half>, ptr addrspace(1) %in2, i32 4
    %r0 = load <4 x half>, ptr addrspace(1) %in1, align 16
    %r1 = load <4 x half>, ptr addrspace(1) %gep2, align 16
@@ -2977,7 +2977,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                        ptr addrspace(1) %in2) #0 {
+                        ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr <2 x float>, ptr addrspace(1) %in2, i32 4
    %r0 = load <2 x float>, ptr addrspace(1) %in1, align 8
    %r1 = load <2 x float>, ptr addrspace(1) %gep2, align 8
@@ -3577,7 +3577,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                        ptr addrspace(1) %in2) #0 {
+                        ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr <4 x float>, ptr addrspace(1) %in2, i32 4
    %r0 = load <4 x float>, ptr addrspace(1) %in1, align 16
    %r1 = load <4 x float>, ptr addrspace(1) %gep2, align 16
@@ -3961,7 +3961,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
 ; GFX1150-NEXT:    s_nop 0
 ; GFX1150-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX1150-NEXT:    s_endpgm
-                        ptr addrspace(1) %in2) #0 {
+                        ptr addrspace(1) %in2) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
    %gep2 = getelementptr <2 x double>, ptr addrspace(1) %in2, i32 4
    %r0 = load <2 x double>, ptr addrspace(1) %in1, align 16
    %r1 = load <2 x double>, ptr addrspace(1) %gep2, align 16
@@ -3969,6 +3969,3 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
    store <2 x double> %r2, ptr addrspace(1) %out, align 16
    ret void
 }
-
-attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
index 1d86fbc5695b74..a25da89ea2969b 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
@@ -489,7 +489,7 @@ define float @v_sqrt_f32_ninf(float %x) {
   ret float %result
 }
 
-define float @v_sqrt_f32_no_infs_attribute(float %x) #5 {
+define float @v_sqrt_f32_no_infs_attribute(float %x) "no-infs-fp-math"="true" {
 ; SDAG-IEEE-LABEL: v_sqrt_f32_no_infs_attribute:
 ; SDAG-IEEE:       ; %bb.0:
 ; SDAG-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1274,7 +1274,7 @@ define float @v_sqrt_f32_afn_nnan_ninf_nsz(float %x) {
   ret float %result
 }
 
-define float @v_sqrt_f32__approx_func_fp_math(float %x) #2 {
+define float @v_sqrt_f32__approx_func_fp_math(float %x) "approx-func-fp-math"="true" {
 ; GCN-LABEL: v_sqrt_f32__approx_func_fp_math:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1284,7 +1284,7 @@ define float @v_sqrt_f32__approx_func_fp_math(float %x) #2 {
   ret float %result
 }
 
-define float @v_sqrt_f32__enough_unsafe_attrs(float %x) #3 {
+define float @v_sqrt_f32__enough_unsafe_attrs(float %x) "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" {
 ; GCN-LABEL: v_sqrt_f32__enough_unsafe_attrs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1294,7 +1294,7 @@ define float @v_sqrt_f32__enough_unsafe_attrs(float %x) #3 {
   ret float %result
 }
 
-define float @v_sqrt_f32__unsafe_attr(float %x) #4 {
+define float @v_sqrt_f32__unsafe_attr(float %x) "unsafe-fp-math"="true" {
 ; GCN-LABEL: v_sqrt_f32__unsafe_attr:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4780,21 +4780,14 @@ entry:
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.sqrt.f32(float) #0
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
-declare <3 x float> @llvm.sqrt.v3f32(<3 x float>) #0
-declare i32 @llvm.amdgcn.readfirstlane(i32) #1
+declare float @llvm.fabs.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.sqrt.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @llvm.sqrt.v3f32(<3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.readfirstlane(i32) convergent nounwind willreturn memory(none)
 
-declare { float, i32 } @llvm.frexp.f32.i32(float) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #1 = { convergent nounwind willreturn memory(none) }
-attributes #2 = { "approx-func-fp-math"="true" }
-attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" }
-attributes #4 = { "unsafe-fp-math"="true" }
-attributes #5 = { "no-infs-fp-math"="true" }
+declare { float, i32 } @llvm.frexp.f32.i32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 !0 = !{float 0.5}
 !1 = !{float 1.0}
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index 932b10f14780b1..7670178f5d2b2d 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -1446,7 +1446,7 @@ define double @v_sqrt_f64_afn_nnan_ninf_nsz(double %x) {
   ret double %result
 }
 
-define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 {
+define double @v_sqrt_f64__approx_func_fp_math(double %x) "approx-func-fp-math"="true" {
 ; SDAG-LABEL: v_sqrt_f64__approx_func_fp_math:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1506,7 +1506,7 @@ define double @v_sqrt_f64__approx_func_fp_math(double %x) #2 {
   ret double %result
 }
 
-define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 {
+define double @v_sqrt_f64__enough_unsafe_attrs(double %x) "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" {
 ; SDAG-LABEL: v_sqrt_f64__enough_unsafe_attrs:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1566,7 +1566,7 @@ define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 {
   ret double %result
 }
 
-define double @v_sqrt_f64__unsafe_attr(double %x) #4 {
+define double @v_sqrt_f64__unsafe_attr(double %x) "unsafe-fp-math"="true" {
 ; SDAG-LABEL: v_sqrt_f64__unsafe_attr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1864,14 +1864,8 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
   ret <3 x double> %result
 }
 
-declare double @llvm.fabs.f64(double) #0
-declare double @llvm.sqrt.f64(double) #0
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
-declare <3 x double> @llvm.sqrt.v3f64(<3 x double>) #0
-declare i32 @llvm.amdgcn.readfirstlane(i32) #1
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #1 = { convergent nounwind willreturn memory(none) }
-attributes #2 = { "approx-func-fp-math"="true" }
-attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" }
-attributes #4 = { "unsafe-fp-math"="true" }
+declare double @llvm.fabs.f64(double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare double @llvm.sqrt.f64(double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x double> @llvm.sqrt.v3f64(<3 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.readfirstlane(i32) convergent nounwind willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
index 85286841cbcac9..93bf36735b877f 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
@@ -5,7 +5,7 @@
 ; Test that fneg is folded into source modifiers when it wasn't
 ; possible to fold fsub to fneg without context.
 
-define float @no_fold_f32_fsub_into_fneg_modifier_ieee_pos1(float %v0, float %v1) #0 {
+define float @no_fold_f32_fsub_into_fneg_modifier_ieee_pos1(float %v0, float %v1) "denormal-fp-math"="ieee,ieee" {
 ; CHECK-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_pos1:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -17,7 +17,7 @@ define float @no_fold_f32_fsub_into_fneg_modifier_ieee_pos1(float %v0, float %v1
   ret float %mul
 }
 
-define float @no_fold_f32_fsub_into_fneg_modifier_daz_pos1(float %v0, float %v1) #1 {
+define float @no_fold_f32_fsub_into_fneg_modifier_daz_pos1(float %v0, float %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; CHECK-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_pos1:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,7 +29,7 @@ define float @no_fold_f32_fsub_into_fneg_modifier_daz_pos1(float %v0, float %v1)
   ret float %mul
 }
 
-define float @no_fold_f32_fsub_into_fneg_modifier_ieee_commuted(float %v0, float %v1) #0 {
+define float @no_fold_f32_fsub_into_fneg_modifier_ieee_commuted(float %v0, float %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_commuted:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -48,7 +48,7 @@ define float @no_fold_f32_fsub_into_fneg_modifier_ieee_commuted(float %v0, float
   ret float %mul
 }
 
-define float @fold_f32_fsub_into_fneg_modifier_ieee_pos0(float %v0, float %v1) #0 {
+define float @fold_f32_fsub_into_fneg_modifier_ieee_pos0(float %v0, float %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_pos0:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -66,7 +66,7 @@ define float @fold_f32_fsub_into_fneg_modifier_ieee_pos0(float %v0, float %v1) #
   ret float %mul
 }
 
-define float @fold_f32_fsub_into_fneg_modifier_daz_pos0(float %v0, float %v1) #1 {
+define float @fold_f32_fsub_into_fneg_modifier_daz_pos0(float %v0, float %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz_pos0:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -84,7 +84,7 @@ define float @fold_f32_fsub_into_fneg_modifier_daz_pos0(float %v0, float %v1) #1
   ret float %mul
 }
 
-define float @no_fold_f32_fsub_into_fneg_modifier_daz_commuted(float %v0, float %v1) #1 {
+define float @no_fold_f32_fsub_into_fneg_modifier_daz_commuted(float %v0, float %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_commuted:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -103,7 +103,7 @@ define float @no_fold_f32_fsub_into_fneg_modifier_daz_commuted(float %v0, float
   ret float %mul
 }
 
-define float @fold_f32_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
+define float @fold_f32_fsub_into_fneg_modifier_ieee(float %v0, float %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -121,7 +121,7 @@ define float @fold_f32_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
   ret float %mul
 }
 
-define float @fold_f32_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
+define float @fold_f32_fsub_into_fneg_modifier_daz(float %v0, float %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -139,7 +139,7 @@ define float @fold_f32_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
   ret float %mul
 }
 
-define float @fold_f32_fsub_into_fneg_modifier_ieee_nsz(float %v0, float %v1) #0 {
+define float @fold_f32_fsub_into_fneg_modifier_ieee_nsz(float %v0, float %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -157,7 +157,7 @@ define float @fold_f32_fsub_into_fneg_modifier_ieee_nsz(float %v0, float %v1) #0
   ret float %mul
 }
 
-define float @fold_f32_fsub_into_fneg_modifier_daz_nsz(float %v0, float %v1) #1 {
+define float @fold_f32_fsub_into_fneg_modifier_daz_nsz(float %v0, float %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -175,7 +175,7 @@ define float @fold_f32_fsub_into_fneg_modifier_daz_nsz(float %v0, float %v1) #1
   ret float %mul
 }
 
-define float @fold_f32_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
+define float @fold_f32_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -193,7 +193,7 @@ define float @fold_f32_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2
   ret float %mul
 }
 
-define float @fold_f32_fsub_into_fneg_modifier_dynamic_nsz(float %v0, float %v1) #2 {
+define float @fold_f32_fsub_into_fneg_modifier_dynamic_nsz(float %v0, float %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -211,7 +211,7 @@ define float @fold_f32_fsub_into_fneg_modifier_dynamic_nsz(float %v0, float %v1)
   ret float %mul
 }
 
-define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee(<2 x float> %v0, <2 x float> %v1) #0 {
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee(<2 x float> %v0, <2 x float> %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -232,7 +232,7 @@ define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee(<2 x float> %v0, <2
   ret <2 x float> %mul
 }
 
-define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz(<2 x float> %v0, <2 x float> %v1) #1 {
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz(<2 x float> %v0, <2 x float> %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -253,7 +253,7 @@ define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz(<2 x float> %v0, <2 x
   ret <2 x float> %mul
 }
 
-define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee_nsz(<2 x float> %v0, <2 x float> %v1) #0 {
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee_nsz(<2 x float> %v0, <2 x float> %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -274,7 +274,7 @@ define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee_nsz(<2 x float> %v0,
   ret <2 x float> %mul
 }
 
-define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz_nsz(<2 x float> %v0, <2 x float> %v1) #1 {
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz_nsz(<2 x float> %v0, <2 x float> %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -295,7 +295,7 @@ define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz_nsz(<2 x float> %v0,
   ret <2 x float> %mul
 }
 
-define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic(<2 x float> %v0, <2 x float> %v1) #2 {
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic(<2 x float> %v0, <2 x float> %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -316,7 +316,7 @@ define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic(<2 x float> %v0,
   ret <2 x float> %mul
 }
 
-define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz(<2 x float> %v0, <2 x float> %v1) #2 {
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz(<2 x float> %v0, <2 x float> %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -338,7 +338,7 @@ define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz(<2 x float> %
 }
 
 
-define half @fold_f16_fsub_into_fneg_modifier_ieee(half %v0, half %v1) #0 {
+define half @fold_f16_fsub_into_fneg_modifier_ieee(half %v0, half %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -356,7 +356,7 @@ define half @fold_f16_fsub_into_fneg_modifier_ieee(half %v0, half %v1) #0 {
   ret half %mul
 }
 
-define half @fold_f16_fsub_into_fneg_modifier_daz(half %v0, half %v1) #1 {
+define half @fold_f16_fsub_into_fneg_modifier_daz(half %v0, half %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -374,7 +374,7 @@ define half @fold_f16_fsub_into_fneg_modifier_daz(half %v0, half %v1) #1 {
   ret half %mul
 }
 
-define half @fold_f16_fsub_into_fneg_modifier_ieee_nsz(half %v0, half %v1) #0 {
+define half @fold_f16_fsub_into_fneg_modifier_ieee_nsz(half %v0, half %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_ieee_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -392,7 +392,7 @@ define half @fold_f16_fsub_into_fneg_modifier_ieee_nsz(half %v0, half %v1) #0 {
   ret half %mul
 }
 
-define half @fold_f16_fsub_into_fneg_modifier_daz_nsz(half %v0, half %v1) #1 {
+define half @fold_f16_fsub_into_fneg_modifier_daz_nsz(half %v0, half %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -410,7 +410,7 @@ define half @fold_f16_fsub_into_fneg_modifier_daz_nsz(half %v0, half %v1) #1 {
   ret half %mul
 }
 
-define half @fold_f16_fsub_into_fneg_modifier_dynamic(half %v0, half %v1) #2 {
+define half @fold_f16_fsub_into_fneg_modifier_dynamic(half %v0, half %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -428,7 +428,7 @@ define half @fold_f16_fsub_into_fneg_modifier_dynamic(half %v0, half %v1) #2 {
   ret half %mul
 }
 
-define half @fold_f16_fsub_into_fneg_modifier_dynamic_nsz(half %v0, half %v1) #2 {
+define half @fold_f16_fsub_into_fneg_modifier_dynamic_nsz(half %v0, half %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -446,7 +446,7 @@ define half @fold_f16_fsub_into_fneg_modifier_dynamic_nsz(half %v0, half %v1) #2
   ret half %mul
 }
 
-define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee(<2 x half> %v0, <2 x half> %v1) #0 {
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee(<2 x half> %v0, <2 x half> %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -464,7 +464,7 @@ define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee(<2 x half> %v0, <2 x
   ret <2 x half> %mul
 }
 
-define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz(<2 x half> %v0, <2 x half> %v1) #1 {
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz(<2 x half> %v0, <2 x half> %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -482,7 +482,7 @@ define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz(<2 x half> %v0, <2 x h
   ret <2 x half> %mul
 }
 
-define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee_nsz(<2 x half> %v0, <2 x half> %v1) #0 {
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee_nsz(<2 x half> %v0, <2 x half> %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -500,7 +500,7 @@ define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee_nsz(<2 x half> %v0, <
   ret <2 x half> %mul
 }
 
-define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz_nsz(<2 x half> %v0, <2 x half> %v1) #1 {
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz_nsz(<2 x half> %v0, <2 x half> %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -518,7 +518,7 @@ define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz_nsz(<2 x half> %v0, <2
   ret <2 x half> %mul
 }
 
-define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic(<2 x half> %v0, <2 x half> %v1) #2 {
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic(<2 x half> %v0, <2 x half> %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -536,7 +536,7 @@ define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic(<2 x half> %v0, <2
   ret <2 x half> %mul
 }
 
-define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz(<2 x half> %v0, <2 x half> %v1) #2 {
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz(<2 x half> %v0, <2 x half> %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -554,7 +554,7 @@ define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz(<2 x half> %v0
   ret <2 x half> %mul
 }
 
-define double @fold_f64_fsub_into_fneg_modifier_ieee(double %v0, double %v1) #0 {
+define double @fold_f64_fsub_into_fneg_modifier_ieee(double %v0, double %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -572,7 +572,7 @@ define double @fold_f64_fsub_into_fneg_modifier_ieee(double %v0, double %v1) #0
   ret double %mul
 }
 
-define double @fold_f64_fsub_into_fneg_modifier_daz(double %v0, double %v1) #1 {
+define double @fold_f64_fsub_into_fneg_modifier_daz(double %v0, double %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -590,7 +590,7 @@ define double @fold_f64_fsub_into_fneg_modifier_daz(double %v0, double %v1) #1 {
   ret double %mul
 }
 
-define double @fold_f64_fsub_into_fneg_modifier_ieee_nsz(double %v0, double %v1) #0 {
+define double @fold_f64_fsub_into_fneg_modifier_ieee_nsz(double %v0, double %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_ieee_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -608,7 +608,7 @@ define double @fold_f64_fsub_into_fneg_modifier_ieee_nsz(double %v0, double %v1)
   ret double %mul
 }
 
-define double @fold_f64_fsub_into_fneg_modifier_daz_nsz(double %v0, double %v1) #1 {
+define double @fold_f64_fsub_into_fneg_modifier_daz_nsz(double %v0, double %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_daz_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -626,7 +626,7 @@ define double @fold_f64_fsub_into_fneg_modifier_daz_nsz(double %v0, double %v1)
   ret double %mul
 }
 
-define double @fold_f64_fsub_into_fneg_modifier_dynamic(double %v0, double %v1) #2 {
+define double @fold_f64_fsub_into_fneg_modifier_dynamic(double %v0, double %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -644,7 +644,7 @@ define double @fold_f64_fsub_into_fneg_modifier_dynamic(double %v0, double %v1)
   ret double %mul
 }
 
-define double @fold_f64_fsub_into_fneg_modifier_dynamic_nsz(double %v0, double %v1) #2 {
+define double @fold_f64_fsub_into_fneg_modifier_dynamic_nsz(double %v0, double %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic_nsz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -662,7 +662,7 @@ define double @fold_f64_fsub_into_fneg_modifier_dynamic_nsz(double %v0, double %
   ret double %mul
 }
 
-define float @fold_f32_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, float %v0, float %v1) #0 {
+define float @fold_f32_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, float %v0, float %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f32_select_user_fsub_into_fneg_modifier_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -684,7 +684,7 @@ define float @fold_f32_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, float
   ret float %mul
 }
 
-define float @no_fold_f32_select_user_fsub_into_fneg_modifier_daz(i1 %cond, float %v0, float %v1) #1 {
+define float @no_fold_f32_select_user_fsub_into_fneg_modifier_daz(i1 %cond, float %v0, float %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -707,7 +707,7 @@ define float @no_fold_f32_select_user_fsub_into_fneg_modifier_daz(i1 %cond, floa
   ret float %mul
 }
 
-define float @no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, float %v0, float %v1) #2 {
+define float @no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, float %v0, float %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -730,7 +730,7 @@ define float @no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond,
   ret float %mul
 }
 
-define half @fold_f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, half %v0, half %v1) #0 {
+define half @fold_f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, half %v0, half %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f16_select_user_fsub_into_fneg_modifier_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -753,7 +753,7 @@ define half @fold_f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, half %v
   ret half %mul
 }
 
-define half @no_fold_f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, half %v0, half %v1) #1 {
+define half @no_fold_f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, half %v0, half %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -776,7 +776,7 @@ define half @no_fold_f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, half
   ret half %mul
 }
 
-define half @no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, half %v0, half %v1) #2 {
+define half @no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, half %v0, half %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -799,7 +799,7 @@ define half @no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, h
   ret half %mul
 }
 
-define double @fold_f64_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, double %v0, double %v1) #0 {
+define double @fold_f64_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, double %v0, double %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_f64_select_user_fsub_into_fneg_modifier_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -824,7 +824,7 @@ define double @fold_f64_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, doubl
   ret double %mul
 }
 
-define double @no_fold_f64_select_user_fsub_into_fneg_modifier_daz(i1 %cond, double %v0, double %v1) #1 {
+define double @no_fold_f64_select_user_fsub_into_fneg_modifier_daz(i1 %cond, double %v0, double %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -849,7 +849,7 @@ define double @no_fold_f64_select_user_fsub_into_fneg_modifier_daz(i1 %cond, dou
   ret double %mul
 }
 
-define double @no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, double %v0, double %v1) #2 {
+define double @no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, double %v0, double %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -874,7 +874,7 @@ define double @no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond,
   ret double %mul
 }
 
-define <2 x half> @fold_v2f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, <2 x half> %v0, <2 x half> %v1) #0 {
+define <2 x half> @fold_v2f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, <2 x half> %v0, <2 x half> %v1) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: fold_v2f16_select_user_fsub_into_fneg_modifier_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -897,7 +897,7 @@ define <2 x half> @fold_v2f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond,
   ret <2 x half> %mul
 }
 
-define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, <2 x half> %v0, <2 x half> %v1) #1 {
+define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, <2 x half> %v0, <2 x half> %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -920,7 +920,7 @@ define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz(i1 %con
   ret <2 x half> %mul
 }
 
-define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, <2 x half> %v0, <2 x half> %v1) #2 {
+define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, <2 x half> %v0, <2 x half> %v1) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -943,7 +943,7 @@ define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1
   ret <2 x half> %mul
 }
 
-define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #3 {
+define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) "denormal-fp-math"="ieee,ieee" strictfp {
 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -955,7 +955,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1)
   ret float %mul
 }
 
-define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #4 {
+define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) "denormal-fp-math"="preserve-sign,preserve-sign" strictfp {
 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -967,7 +967,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1)
   ret float %mul
 }
 
-define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #5 {
+define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) "denormal-fp-math"="dynamic,dynamic" strictfp {
 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -979,7 +979,7 @@ define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %
   ret float %mul
 }
 
-define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee(float %v0) #0 {
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee(float %v0) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -999,7 +999,7 @@ define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee(float %v0) #0 {
   ret i1 %class
 }
 
-define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz(float %v0) #1 {
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz(float %v0) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1020,7 +1020,7 @@ define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz(float %v0) #1 {
   ret i1 %class
 }
 
-define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic(float %v0) #2 {
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic(float %v0) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1041,7 +1041,7 @@ define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic(float %v0) #
   ret i1 %class
 }
 
-define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee(float %v0) #0 {
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee(float %v0) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1063,7 +1063,7 @@ define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee(float %v0)
   ret i1 %class
 }
 
-define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz(float %v0) #1 {
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz(float %v0) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1086,7 +1086,7 @@ define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz(float %v0) #
   ret i1 %class
 }
 
-define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic(float %v0) #2 {
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic(float %v0) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1109,7 +1109,7 @@ define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic(float %v
   ret i1 %class
 }
 
-define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_ieee(float %v0, i32 %testmask) #0 {
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_ieee(float %v0, i32 %testmask) "denormal-fp-math"="ieee,ieee" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_ieee:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1129,7 +1129,7 @@ define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_ieee(float %v0, i32 %te
   ret i1 %class
 }
 
-define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_daz(float %v0, i32 %testmask) #1 {
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_daz(float %v0, i32 %testmask) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1150,7 +1150,7 @@ define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_daz(float %v0, i32 %tes
   ret i1 %class
 }
 
-define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic(float %v0, i32 %testmask) #2 {
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic(float %v0, i32 %testmask) "denormal-fp-math"="dynamic,dynamic" {
 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1171,7 +1171,7 @@ define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic(float %v0, i32
   ret i1 %class
 }
 
-define i1 @no_fold_f64_fsub_into_fneg_modifier_class_var_daz(double %v0, i32 %testmask) #1 {
+define i1 @no_fold_f64_fsub_into_fneg_modifier_class_var_daz(double %v0, i32 %testmask) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_var_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1191,7 +1191,7 @@ define i1 @no_fold_f64_fsub_into_fneg_modifier_class_var_daz(double %v0, i32 %te
   ret i1 %class
 }
 
-define i1 @no_fold_f16_fsub_into_fneg_modifier_class_var_daz(half %v0, i32 %testmask) #1 {
+define i1 @no_fold_f16_fsub_into_fneg_modifier_class_var_daz(half %v0, i32 %testmask) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_var_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1212,7 +1212,7 @@ define i1 @no_fold_f16_fsub_into_fneg_modifier_class_var_daz(half %v0, i32 %test
   ret i1 %class
 }
 
-define i1 @no_fold_f64_fsub_into_fneg_modifier_class_daz(double %v0) #1 {
+define i1 @no_fold_f64_fsub_into_fneg_modifier_class_daz(double %v0) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1234,7 +1234,7 @@ define i1 @no_fold_f64_fsub_into_fneg_modifier_class_daz(double %v0) #1 {
   ret i1 %class
 }
 
-define i1 @no_fold_f16_fsub_into_fneg_modifier_class_daz(half %v0) #1 {
+define i1 @no_fold_f16_fsub_into_fneg_modifier_class_daz(half %v0) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1257,7 +1257,7 @@ define i1 @no_fold_f16_fsub_into_fneg_modifier_class_daz(half %v0) #1 {
   ret i1 %class
 }
 
-define amdgpu_gfx float @fold_f32_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %v1) #1 {
+define amdgpu_gfx float @fold_f32_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %v1) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_interp_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1280,7 +1280,7 @@ define amdgpu_gfx float @fold_f32_fsub_into_fneg_modifier_interp_daz(float %v0,
   ret float %p0_0
 }
 
-define amdgpu_gfx float @fold_f16_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %m0) #1 {
+define amdgpu_gfx float @fold_f16_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %m0) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1312,10 +1312,3 @@ declare i1 @llvm.is.fpclass.f16(half, i32 immarg)
 declare i1 @llvm.amdgcn.class.f16(half, i32)
 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32)
 declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
-
-attributes #0 = { "denormal-fp-math"="ieee,ieee" }
-attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }
-attributes #3 = { "denormal-fp-math"="ieee,ieee" strictfp }
-attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" strictfp }
-attributes #5 = { "denormal-fp-math"="dynamic,dynamic" strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 9c00df936a7464..669e38dbb87cfc 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -95,7 +95,7 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_f32(ptr addrspace(1) %out, ptr addrsp
 ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
 ; SI-NOT: xor
-define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="true" {
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %a = load float, ptr addrspace(1) %in, align 4
   %b = load float, ptr addrspace(1) %b_ptr, align 4
@@ -111,7 +111,7 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
 ; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
-define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="false" {
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %a = load float, ptr addrspace(1) %in, align 4
   %b = load float, ptr addrspace(1) %b_ptr, align 4
@@ -123,12 +123,9 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1)
 
 ; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32:
 ; SI-NOT: v_sub
-define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="true" {
   %a = load float, ptr addrspace(1) %in, align 4
   %result = fsub float %a, 0.0
   store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/fsub64.ll b/llvm/test/CodeGen/AMDGPU/fsub64.ll
index dd2c8746ca81ed..95569b894e1077 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub64.ll
@@ -1,7 +1,7 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare double @llvm.fabs.f64(double) #0
+declare double @llvm.fabs.f64(double) nounwind readnone
 
 ; SI-LABEL: {{^}}fsub_f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
@@ -20,7 +20,7 @@ define amdgpu_kernel void @fsub_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1)
                            ptr addrspace(1) %in2) {
   %r0 = load double, ptr addrspace(1) %in1
   %r1 = load double, ptr addrspace(1) %in2
-  %r1.fabs = call double @llvm.fabs.f64(double %r1) #0
+  %r1.fabs = call double @llvm.fabs.f64(double %r1) nounwind readnone
   %r2 = fsub double %r0, %r1.fabs
   store double %r2, ptr addrspace(1) %out
   ret void
@@ -32,7 +32,7 @@ define amdgpu_kernel void @fsub_fabs_inv_f64(ptr addrspace(1) %out, ptr addrspac
                                ptr addrspace(1) %in2) {
   %r0 = load double, ptr addrspace(1) %in1
   %r1 = load double, ptr addrspace(1) %in2
-  %r0.fabs = call double @llvm.fabs.f64(double %r0) #0
+  %r0.fabs = call double @llvm.fabs.f64(double %r0) nounwind readnone
   %r2 = fsub double %r0.fabs, %r1
   store double %r2, ptr addrspace(1) %out
   ret void
@@ -103,5 +103,3 @@ define amdgpu_kernel void @s_fsub_v4f64(ptr addrspace(1) %out, <4 x double> %a,
   store <4 x double> %result, ptr addrspace(1) %out, align 16
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
index 27845b6b5b2fee..557800bd4e03f9 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
 
-define void @void_func_i1_inreg(i1 inreg %arg0) #0 {
+define void @void_func_i1_inreg(i1 inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_i1_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -24,7 +24,7 @@ define void @void_func_i1_inreg(i1 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i8_inreg(i8 inreg %arg0) #0 {
+define void @void_func_i8_inreg(i8 inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_i8_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -43,7 +43,7 @@ define void @void_func_i8_inreg(i8 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i16_inreg(i16 inreg %arg0) #0 {
+define void @void_func_i16_inreg(i16 inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_i16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -62,7 +62,7 @@ define void @void_func_i16_inreg(i16 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i32_inreg(i32 inreg %arg0) #0 {
+define void @void_func_i32_inreg(i32 inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -81,7 +81,7 @@ define void @void_func_i32_inreg(i32 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_i64_inreg(i64 inreg %arg0) #0 {
+define void @void_func_i64_inreg(i64 inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -101,7 +101,7 @@ define void @void_func_i64_inreg(i64 inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_f16_inreg(half inreg %arg0) #0 {
+define void @void_func_f16_inreg(half inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -120,7 +120,7 @@ define void @void_func_f16_inreg(half inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_f32_inreg(float inreg %arg0) #0 {
+define void @void_func_f32_inreg(float inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_f32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -139,7 +139,7 @@ define void @void_func_f32_inreg(float inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_f64_inreg(double inreg %arg0) #0 {
+define void @void_func_f64_inreg(double inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_f64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -159,7 +159,7 @@ define void @void_func_f64_inreg(double inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 {
+define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v2i16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -178,7 +178,7 @@ define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) #0 {
+define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v3i16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -201,7 +201,7 @@ define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) #0 {
+define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) nounwind {
 ; GFX89-LABEL: void_func_v4i16_inreg:
 ; GFX89:       ; %bb.0:
 ; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -230,7 +230,7 @@ define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) #0 {
+define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v5i16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -255,7 +255,7 @@ define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) #0 {
+define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v8i16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -278,7 +278,7 @@ define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 {
+define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v2i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -298,7 +298,7 @@ define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) #0 {
+define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v3i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -320,7 +320,7 @@ define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) #0 {
+define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v4i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -343,7 +343,7 @@ define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) #0 {
+define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v5i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -371,7 +371,7 @@ define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) #0 {
+define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v8i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -404,7 +404,7 @@ define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) #0 {
+define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v16i32_inreg:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -484,7 +484,7 @@ define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) #0 {
+define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v32i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -567,7 +567,7 @@ define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 {
+define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v2i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -590,7 +590,7 @@ define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) #0 {
+define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v3i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -619,7 +619,7 @@ define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) #0 {
+define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v4i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -652,7 +652,7 @@ define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) #0 {
+define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v5i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -691,7 +691,7 @@ define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) #0 {
+define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v8i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -742,7 +742,7 @@ define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) #0 {
+define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v16i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -825,7 +825,7 @@ define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 {
+define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v2f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -844,7 +844,7 @@ define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) #0 {
+define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v3f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -867,7 +867,7 @@ define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) #0 {
+define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v4f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -887,7 +887,7 @@ define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) #0 {
+define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v8f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -910,7 +910,7 @@ define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) #0 {
+define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v16f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -943,7 +943,7 @@ define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 {
+define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v2f32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -963,7 +963,7 @@ define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) #0 {
+define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v3f32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -985,7 +985,7 @@ define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) #0 {
+define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v4f32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1008,7 +1008,7 @@ define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) #0 {
+define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v8f32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1041,7 +1041,7 @@ define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) #0 {
+define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v16f32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1092,7 +1092,7 @@ define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 {
+define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v2f64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1115,7 +1115,7 @@ define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) #0 {
+define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v3f64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1144,7 +1144,7 @@ define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) #0 {
+define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v4f64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1177,7 +1177,7 @@ define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) #0 {
+define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v8f64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1228,7 +1228,7 @@ define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) #0 {
+define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v16f64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1311,7 +1311,7 @@ define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inreg %arg1, i8 inreg %arg2, i16 inreg %arg3, half inreg %arg4) #0 {
+define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inreg %arg1, i8 inreg %arg2, i16 inreg %arg3, half inreg %arg4) nounwind {
 ; GFX9-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1423,7 +1423,7 @@ define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inr
   ret void
 }
 
-define void @void_func_v32i32_v2i32_v2f32_inreg(<32 x i32> inreg %arg0, <2 x i32> inreg %arg1, <2 x float> inreg %arg2) #0 {
+define void @void_func_v32i32_v2i32_v2f32_inreg(<32 x i32> inreg %arg0, <2 x i32> inreg %arg1, <2 x float> inreg %arg2) nounwind {
 ; GFX9-LABEL: void_func_v32i32_v2i32_v2f32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1751,7 +1751,7 @@ define void @too_many_args_use_workitem_id_x_inreg(
   ret void
 }
 
-define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
+define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) nounwind {
 ; GFX9-LABEL: void_func_i32_v2float_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1777,7 +1777,7 @@ define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg
   ret void
 }
 
-define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
+define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) nounwind {
 ; GFX9-LABEL: caller_void_func_i32_v2float_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1870,7 +1870,7 @@ define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inr
   ret void
 }
 
-define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 {
+define void @void_func_bf16_inreg(bfloat inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1889,7 +1889,7 @@ define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 {
+define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v2bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1908,7 +1908,7 @@ define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) #0 {
+define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v3bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1931,7 +1931,7 @@ define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) #0 {
+define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v4bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1951,7 +1951,7 @@ define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) #0 {
+define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v8bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1974,7 +1974,7 @@ define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16bf16_inreg(<16 x bfloat> inreg %arg0) #0 {
+define void @void_func_v16bf16_inreg(<16 x bfloat> inreg %arg0) nounwind {
 ; GFX9-LABEL: void_func_v16bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2340,9 +2340,6 @@ define void @void_func_a16i32_inreg__noimplicit([16 x i32] inreg %arg0, ptr addr
   ret void
 }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline }
-
 
 
 
diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll
index db89ad66ffab0d..edd7d319de122d 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-define void @void_func_i1(i1 %arg0) #0 {
+define void @void_func_i1(i1 %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_i1:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -27,7 +27,7 @@ define void @void_func_i1(i1 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
+define void @void_func_i1_zeroext(i1 zeroext %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_i1_zeroext:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -52,7 +52,7 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i1_signext(i1 signext %arg0) #0 {
+define void @void_func_i1_signext(i1 signext %arg0) nounwind {
 ; CI-LABEL: void_func_i1_signext:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -97,7 +97,7 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 {
   ret void
 }
 
-define void @i1_arg_i1_use(i1 %arg) #0 {
+define void @i1_arg_i1_use(i1 %arg) nounwind {
 ; CIGFX89-LABEL: i1_arg_i1_use:
 ; CIGFX89:       ; %bb.0: ; %bb
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -146,7 +146,7 @@ bb2:
   ret void
 }
 
-define void @void_func_i8(i8 %arg0) #0 {
+define void @void_func_i8(i8 %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_i8:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -167,7 +167,7 @@ define void @void_func_i8(i8 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
+define void @void_func_i8_zeroext(i8 zeroext %arg0) nounwind {
 ; CI-LABEL: void_func_i8_zeroext:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -212,7 +212,7 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i8_signext(i8 signext %arg0) #0 {
+define void @void_func_i8_signext(i8 signext %arg0) nounwind {
 ; CI-LABEL: void_func_i8_signext:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -257,7 +257,7 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i16(i16 %arg0) #0 {
+define void @void_func_i16(i16 %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_i16:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -278,7 +278,7 @@ define void @void_func_i16(i16 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
+define void @void_func_i16_zeroext(i16 zeroext %arg0) nounwind {
 ; CI-LABEL: void_func_i16_zeroext:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -323,7 +323,7 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i16_signext(i16 signext %arg0) #0 {
+define void @void_func_i16_signext(i16 signext %arg0) nounwind {
 ; CI-LABEL: void_func_i16_signext:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -368,7 +368,7 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 {
   ret void
 }
 
-define void @void_func_i32(i32 %arg0) #0 {
+define void @void_func_i32(i32 %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -389,7 +389,7 @@ define void @void_func_i32(i32 %arg0) #0 {
   ret void
 }
 
-define void @void_func_i64(i64 %arg0) #0 {
+define void @void_func_i64(i64 %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_i64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -410,7 +410,7 @@ define void @void_func_i64(i64 %arg0) #0 {
   ret void
 }
 
-define void @void_func_f16(half %arg0) #0 {
+define void @void_func_f16(half %arg0) nounwind {
 ; CI-LABEL: void_func_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -441,7 +441,7 @@ define void @void_func_f16(half %arg0) #0 {
   ret void
 }
 
-define void @void_func_f32(float %arg0) #0 {
+define void @void_func_f32(float %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_f32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -462,7 +462,7 @@ define void @void_func_f32(float %arg0) #0 {
   ret void
 }
 
-define void @void_func_f64(double %arg0) #0 {
+define void @void_func_f64(double %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_f64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -483,7 +483,7 @@ define void @void_func_f64(double %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i32(<2 x i32> %arg0) #0 {
+define void @void_func_v2i32(<2 x i32> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v2i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -504,7 +504,7 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i32(<3 x i32> %arg0) #0 {
+define void @void_func_v3i32(<3 x i32> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v3i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -525,7 +525,7 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i32(<4 x i32> %arg0) #0 {
+define void @void_func_v4i32(<4 x i32> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v4i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -546,7 +546,7 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i32(<5 x i32> %arg0) #0 {
+define void @void_func_v5i32(<5 x i32> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v5i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -570,7 +570,7 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i32(<8 x i32> %arg0) #0 {
+define void @void_func_v8i32(<8 x i32> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v8i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -594,7 +594,7 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16i32(<16 x i32> %arg0) #0 {
+define void @void_func_v16i32(<16 x i32> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v16i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -622,7 +622,7 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v32i32(<32 x i32> %arg0) #0 {
+define void @void_func_v32i32(<32 x i32> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v32i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -664,7 +664,7 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 {
 }
 
 ; 1 over register limit
-define void @void_func_v33i32(<33 x i32> %arg0) #0 {
+define void @void_func_v33i32(<33 x i32> %arg0) nounwind {
 ; CI-LABEL: void_func_v33i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -756,7 +756,7 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i64(<2 x i64> %arg0) #0 {
+define void @void_func_v2i64(<2 x i64> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v2i64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -777,7 +777,7 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i64(<3 x i64> %arg0) #0 {
+define void @void_func_v3i64(<3 x i64> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v3i64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -801,7 +801,7 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i64(<4 x i64> %arg0) #0 {
+define void @void_func_v4i64(<4 x i64> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v4i64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -825,7 +825,7 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i64(<5 x i64> %arg0) #0 {
+define void @void_func_v5i64(<5 x i64> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v5i64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -851,7 +851,7 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i64(<8 x i64> %arg0) #0 {
+define void @void_func_v8i64(<8 x i64> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v8i64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -879,7 +879,7 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16i64(<16 x i64> %arg0) #0 {
+define void @void_func_v16i64(<16 x i64> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v16i64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -920,7 +920,7 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i8(<2 x i8> %arg0) #0 {
+define void @void_func_v2i8(<2 x i8> %arg0) nounwind {
 ; CI-LABEL: void_func_v2i8:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -964,7 +964,7 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i16(<2 x i16> %arg0) #0 {
+define void @void_func_v2i16(<2 x i16> %arg0) nounwind {
 ; CI-LABEL: void_func_v2i16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -997,7 +997,7 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i8(<3 x i8> %arg0) #0 {
+define void @void_func_v3i8(<3 x i8> %arg0) nounwind {
 ; CI-LABEL: void_func_v3i8:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1047,7 +1047,7 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i8(<4 x i8> %arg0) #0 {
+define void @void_func_v4i8(<4 x i8> %arg0) nounwind {
 ; CI-LABEL: void_func_v4i8:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1108,7 +1108,7 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i8(<5 x i8> %arg0) #0 {
+define void @void_func_v5i8(<5 x i8> %arg0) nounwind {
 ; CI-LABEL: void_func_v5i8:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1174,7 +1174,7 @@ define void @void_func_v5i8(<5 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i8(<8 x i8> %arg0) #0 {
+define void @void_func_v8i8(<8 x i8> %arg0) nounwind {
 ; CI-LABEL: void_func_v8i8:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1257,7 +1257,7 @@ define void @void_func_v8i8(<8 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16i8(<16 x i8> %arg0) #0 {
+define void @void_func_v16i8(<16 x i8> %arg0) nounwind {
 ; CI-LABEL: void_func_v16i8:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1385,7 +1385,7 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v32i8(<32 x i8> %arg0) #0 {
+define void @void_func_v32i8(<32 x i8> %arg0) nounwind {
 ; CI-LABEL: void_func_v32i8:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1619,7 +1619,7 @@ define void @void_func_v32i8(<32 x i8> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3i16(<3 x i16> %arg0) #0 {
+define void @void_func_v3i16(<3 x i16> %arg0) nounwind {
 ; CI-LABEL: void_func_v3i16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1656,7 +1656,7 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4i16(<4 x i16> %arg0) #0 {
+define void @void_func_v4i16(<4 x i16> %arg0) nounwind {
 ; CI-LABEL: void_func_v4i16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1692,7 +1692,7 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v5i16(<5 x i16> %arg0) #0 {
+define void @void_func_v5i16(<5 x i16> %arg0) nounwind {
 ; CI-LABEL: void_func_v5i16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1732,7 +1732,7 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8i16(<8 x i16> %arg0) #0 {
+define void @void_func_v8i16(<8 x i16> %arg0) nounwind {
 ; CI-LABEL: void_func_v8i16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1774,7 +1774,7 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16i16(<16 x i16> %arg0) #0 {
+define void @void_func_v16i16(<16 x i16> %arg0) nounwind {
 ; CI-LABEL: void_func_v16i16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1832,7 +1832,7 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2i24(<2 x i24> %arg0) #0 {
+define void @void_func_v2i24(<2 x i24> %arg0) nounwind {
 ; CI-LABEL: void_func_v2i24:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1888,7 +1888,7 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f32(<2 x float> %arg0) #0 {
+define void @void_func_v2f32(<2 x float> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v2f32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1909,7 +1909,7 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3f32(<3 x float> %arg0) #0 {
+define void @void_func_v3f32(<3 x float> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v3f32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1930,7 +1930,7 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4f32(<4 x float> %arg0) #0 {
+define void @void_func_v4f32(<4 x float> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v4f32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1951,7 +1951,7 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8f32(<8 x float> %arg0) #0 {
+define void @void_func_v8f32(<8 x float> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v8f32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1975,7 +1975,7 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16f32(<16 x float> %arg0) #0 {
+define void @void_func_v16f32(<16 x float> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v16f32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2003,7 +2003,7 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f64(<2 x double> %arg0) #0 {
+define void @void_func_v2f64(<2 x double> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v2f64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2024,7 +2024,7 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3f64(<3 x double> %arg0) #0 {
+define void @void_func_v3f64(<3 x double> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v3f64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2048,7 +2048,7 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4f64(<4 x double> %arg0) #0 {
+define void @void_func_v4f64(<4 x double> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v4f64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2072,7 +2072,7 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8f64(<8 x double> %arg0) #0 {
+define void @void_func_v8f64(<8 x double> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v8f64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2100,7 +2100,7 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16f64(<16 x double> %arg0) #0 {
+define void @void_func_v16f64(<16 x double> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_v16f64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2141,7 +2141,7 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2f16(<2 x half> %arg0) #0 {
+define void @void_func_v2f16(<2 x half> %arg0) nounwind {
 ; CI-LABEL: void_func_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2176,7 +2176,7 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 {
 }
 
 ; FIXME: Different abi if f16 legal
-define void @void_func_v3f16(<3 x half> %arg0) #0 {
+define void @void_func_v3f16(<3 x half> %arg0) nounwind {
 ; CI-LABEL: void_func_v3f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2215,7 +2215,7 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4f16(<4 x half> %arg0) #0 {
+define void @void_func_v4f16(<4 x half> %arg0) nounwind {
 ; CI-LABEL: void_func_v4f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2253,7 +2253,7 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8f16(<8 x half> %arg0) #0 {
+define void @void_func_v8f16(<8 x half> %arg0) nounwind {
 ; CI-LABEL: void_func_v8f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2299,7 +2299,7 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16f16(<16 x half> %arg0) #0 {
+define void @void_func_v16f16(<16 x half> %arg0) nounwind {
 ; CI-LABEL: void_func_v16f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2366,7 +2366,7 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 {
 }
 
 ; Make sure there is no alignment requirement for passed vgprs.
-define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
+define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) nounwind {
 ; CIGFX89-LABEL: void_func_i32_i64_i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2398,7 +2398,7 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
   ret void
 }
 
-define void @void_func_struct_i32({ i32 } %arg0) #0 {
+define void @void_func_struct_i32({ i32 } %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_struct_i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2419,7 +2419,7 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 {
   ret void
 }
 
-define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
+define void @void_func_struct_i8_i32({ i8, i32 } %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_struct_i8_i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2443,7 +2443,7 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
   ret void
 }
 
-define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
+define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_byval_struct_i8_i32:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2476,7 +2476,7 @@ define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %
   ret void
 }
 
-define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
+define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) nounwind {
 ; CI-LABEL: void_func_byval_struct_i8_i32_x2:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2586,7 +2586,7 @@ define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }
   ret void
 }
 
-define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 {
+define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) nounwind {
 ; CIGFX89-LABEL: void_func_byval_i32_byval_i64:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2622,7 +2622,7 @@ define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, pt
   ret void
 }
 
-define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
+define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: void_func_v32i32_i32_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2761,7 +2761,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0
 }
 
 ; FIXME: Different ext load types on CI vs. VI
-define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) #0 {
+define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) nounwind {
 ; CI-LABEL: void_func_v32i32_i1_i8_i16_bf16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2945,7 +2945,7 @@ define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg
   ret void
 }
 
-define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
+define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) nounwind {
 ; CI-LABEL: void_func_v32i32_v2i32_v2f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3087,7 +3087,7 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2
   ret void
 }
 
-define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) #0 {
+define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) nounwind {
 ; CI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3284,7 +3284,7 @@ define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i
   ret void
 }
 
-define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
+define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) nounwind {
 ; CI-LABEL: void_func_v32i32_v2i64_v2f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3443,7 +3443,7 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2
   ret void
 }
 
-define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
+define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) nounwind {
 ; CI-LABEL: void_func_v32i32_v4i32_v4f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3602,7 +3602,7 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4
   ret void
 }
 
-define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
+define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) nounwind {
 ; CI-LABEL: void_func_v32i32_v8i32_v8f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3813,7 +3813,7 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
   ret void
 }
 
-define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
+define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) nounwind {
 ; CI-LABEL: void_func_v32i32_v16i32_v16f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4129,7 +4129,7 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,
 }
 
 ; Make sure v3 isn't a wasted register because of v3 types being promoted to v4
-define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
+define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) nounwind {
 ; CI-LABEL: void_func_v3f32_wasted_reg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4181,7 +4181,7 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
   ret void
 }
 
-define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
+define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) nounwind {
 ; CI-LABEL: void_func_v3i32_wasted_reg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4234,7 +4234,7 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
 }
 
 ; Check there is no crash.
-define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 {
+define void @void_func_volatile_v16i8(<16 x i8> %arg0) nounwind {
 ; CIGFX89-LABEL: void_func_volatile_v16i8:
 ; CIGFX89:       ; %bb.0:
 ; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4317,7 +4317,7 @@ define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 {
 }
 
 ; Check there is no crash.
-define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
+define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) nounwind {
 ; CI-LABEL: void_func_v32i32_v16i8:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4636,7 +4636,7 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
 }
 
 
-define void @void_func_bf16(bfloat %arg0) #0 {
+define void @void_func_bf16(bfloat %arg0) nounwind {
 ; CI-LABEL: void_func_bf16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4668,7 +4668,7 @@ define void @void_func_bf16(bfloat %arg0) #0 {
   ret void
 }
 
-define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 {
+define void @void_func_v2bf16(<2 x bfloat> %arg0) nounwind {
 ; CI-LABEL: void_func_v2bf16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4702,7 +4702,7 @@ define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 {
+define void @void_func_v3bf16(<3 x bfloat> %arg0) nounwind {
 ; CI-LABEL: void_func_v3bf16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4742,7 +4742,7 @@ define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 {
+define void @void_func_v4bf16(<4 x bfloat> %arg0) nounwind {
 ; CI-LABEL: void_func_v4bf16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4780,7 +4780,7 @@ define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 {
+define void @void_func_v8bf16(<8 x bfloat> %arg0) nounwind {
 ; CI-LABEL: void_func_v8bf16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4826,7 +4826,7 @@ define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 {
   ret void
 }
 
-define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 {
+define void @void_func_v16bf16(<16 x bfloat> %arg0) nounwind {
 ; CI-LABEL: void_func_v16bf16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4891,5 +4891,3 @@ define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 {
   store <16 x bfloat> %arg0, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll
index acadee27981710..540851846f658a 100644
--- a/llvm/test/CodeGen/AMDGPU/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX9 %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-define i1 @i1_func_void() #0 {
+define i1 @i1_func_void() nounwind {
 ; GFX789-LABEL: i1_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -27,7 +27,7 @@ define i1 @i1_func_void() #0 {
 }
 
 ; FIXME: Missing and?
-define zeroext i1 @i1_zeroext_func_void() #0 {
+define zeroext i1 @i1_zeroext_func_void() nounwind {
 ; GFX789-LABEL: i1_zeroext_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -49,7 +49,7 @@ define zeroext i1 @i1_zeroext_func_void() #0 {
   ret i1 %val
 }
 
-define signext i1 @i1_signext_func_void() #0 {
+define signext i1 @i1_signext_func_void() nounwind {
 ; GFX789-LABEL: i1_signext_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -73,7 +73,7 @@ define signext i1 @i1_signext_func_void() #0 {
   ret i1 %val
 }
 
-define i8 @i8_func_void() #0 {
+define i8 @i8_func_void() nounwind {
 ; GFX789-LABEL: i8_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -95,7 +95,7 @@ define i8 @i8_func_void() #0 {
   ret i8 %val
 }
 
-define zeroext i8 @i8_zeroext_func_void() #0 {
+define zeroext i8 @i8_zeroext_func_void() nounwind {
 ; GFX789-LABEL: i8_zeroext_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -117,7 +117,7 @@ define zeroext i8 @i8_zeroext_func_void() #0 {
   ret i8 %val
 }
 
-define signext i8 @i8_signext_func_void() #0 {
+define signext i8 @i8_signext_func_void() nounwind {
 ; GFX789-LABEL: i8_signext_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -139,7 +139,7 @@ define signext i8 @i8_signext_func_void() #0 {
   ret i8 %val
 }
 
-define i16 @i16_func_void() #0 {
+define i16 @i16_func_void() nounwind {
 ; GFX789-LABEL: i16_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -161,7 +161,7 @@ define i16 @i16_func_void() #0 {
   ret i16 %val
 }
 
-define zeroext i16 @i16_zeroext_func_void() #0 {
+define zeroext i16 @i16_zeroext_func_void() nounwind {
 ; GFX789-LABEL: i16_zeroext_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -183,7 +183,7 @@ define zeroext i16 @i16_zeroext_func_void() #0 {
   ret i16 %val
 }
 
-define signext i16 @i16_signext_func_void() #0 {
+define signext i16 @i16_signext_func_void() nounwind {
 ; GFX789-LABEL: i16_signext_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -205,7 +205,7 @@ define signext i16 @i16_signext_func_void() #0 {
   ret i16 %val
 }
 
-define i32 @i32_func_void() #0 {
+define i32 @i32_func_void() nounwind {
 ; GFX789-LABEL: i32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -227,7 +227,7 @@ define i32 @i32_func_void() #0 {
   ret i32 %val
 }
 
-define i48 @i48_func_void() #0 {
+define i48 @i48_func_void() nounwind {
 ; GFX789-LABEL: i48_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -252,7 +252,7 @@ define i48 @i48_func_void() #0 {
   ret i48 %val
 }
 
-define zeroext i48 @i48_zeroext_func_void() #0 {
+define zeroext i48 @i48_zeroext_func_void() nounwind {
 ; GFX789-LABEL: i48_zeroext_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -277,7 +277,7 @@ define zeroext i48 @i48_zeroext_func_void() #0 {
   ret i48 %val
 }
 
-define signext i48 @i48_signext_func_void() #0 {
+define signext i48 @i48_signext_func_void() nounwind {
 ; GFX789-LABEL: i48_signext_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -302,7 +302,7 @@ define signext i48 @i48_signext_func_void() #0 {
   ret i48 %val
 }
 
-define i63 @i63_func_void(i63 %val) #0 {
+define i63 @i63_func_void(i63 %val) nounwind {
 ; GFX789-LABEL: i63_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -315,7 +315,7 @@ define i63 @i63_func_void(i63 %val) #0 {
   ret i63 %val
 }
 
-define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 {
+define zeroext i63 @i63_zeroext_func_void(i63 %val) nounwind {
 ; GFX789-LABEL: i63_zeroext_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -330,7 +330,7 @@ define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 {
   ret i63 %val
 }
 
-define signext i63 @i63_signext_func_void(i63 %val) #0 {
+define signext i63 @i63_signext_func_void(i63 %val) nounwind {
 ; CI-LABEL: i63_signext_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -355,7 +355,7 @@ define signext i63 @i63_signext_func_void(i63 %val) #0 {
   ret i63 %val
 }
 
-define i64 @i64_func_void() #0 {
+define i64 @i64_func_void() nounwind {
 ; GFX789-LABEL: i64_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -377,7 +377,7 @@ define i64 @i64_func_void() #0 {
   ret i64 %val
 }
 
-define i65 @i65_func_void() #0 {
+define i65 @i65_func_void() nounwind {
 ; GFX789-LABEL: i65_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -402,7 +402,7 @@ define i65 @i65_func_void() #0 {
   ret i65 %val
 }
 
-define float @f32_func_void() #0 {
+define float @f32_func_void() nounwind {
 ; GFX789-LABEL: f32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -424,7 +424,7 @@ define float @f32_func_void() #0 {
   ret float %val
 }
 
-define double @f64_func_void() #0 {
+define double @f64_func_void() nounwind {
 ; GFX789-LABEL: f64_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -446,7 +446,7 @@ define double @f64_func_void() #0 {
   ret double %val
 }
 
-define <2 x double> @v2f64_func_void() #0 {
+define <2 x double> @v2f64_func_void() nounwind {
 ; GFX789-LABEL: v2f64_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -468,7 +468,7 @@ define <2 x double> @v2f64_func_void() #0 {
   ret <2 x double> %val
 }
 
-define <2 x i32> @v2i32_func_void() #0 {
+define <2 x i32> @v2i32_func_void() nounwind {
 ; GFX789-LABEL: v2i32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -490,7 +490,7 @@ define <2 x i32> @v2i32_func_void() #0 {
   ret <2 x i32> %val
 }
 
-define <3 x i32> @v3i32_func_void() #0 {
+define <3 x i32> @v3i32_func_void() nounwind {
 ; GFX789-LABEL: v3i32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -512,7 +512,7 @@ define <3 x i32> @v3i32_func_void() #0 {
   ret <3 x i32> %val
 }
 
-define <4 x i32> @v4i32_func_void() #0 {
+define <4 x i32> @v4i32_func_void() nounwind {
 ; GFX789-LABEL: v4i32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -534,7 +534,7 @@ define <4 x i32> @v4i32_func_void() #0 {
   ret <4 x i32> %val
 }
 
-define <5 x i32> @v5i32_func_void() #0 {
+define <5 x i32> @v5i32_func_void() nounwind {
 ; GFX789-LABEL: v5i32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -560,7 +560,7 @@ define <5 x i32> @v5i32_func_void() #0 {
   ret <5 x i32> %val
 }
 
-define <8 x i32> @v8i32_func_void() #0 {
+define <8 x i32> @v8i32_func_void() nounwind {
 ; GFX789-LABEL: v8i32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -590,7 +590,7 @@ define <8 x i32> @v8i32_func_void() #0 {
   ret <8 x i32> %val
 }
 
-define <16 x i32> @v16i32_func_void() #0 {
+define <16 x i32> @v16i32_func_void() nounwind {
 ; GFX789-LABEL: v16i32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -624,7 +624,7 @@ define <16 x i32> @v16i32_func_void() #0 {
   ret <16 x i32> %val
 }
 
-define <32 x i32> @v32i32_func_void() #0 {
+define <32 x i32> @v32i32_func_void() nounwind {
 ; GFX789-LABEL: v32i32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -666,7 +666,7 @@ define <32 x i32> @v32i32_func_void() #0 {
   ret <32 x i32> %val
 }
 
-define <2 x i64> @v2i64_func_void() #0 {
+define <2 x i64> @v2i64_func_void() nounwind {
 ; GFX789-LABEL: v2i64_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -688,7 +688,7 @@ define <2 x i64> @v2i64_func_void() #0 {
   ret <2 x i64> %val
 }
 
-define <3 x i64> @v3i64_func_void() #0 {
+define <3 x i64> @v3i64_func_void() nounwind {
 ; GFX789-LABEL: v3i64_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -718,7 +718,7 @@ define <3 x i64> @v3i64_func_void() #0 {
   ret <3 x i64> %val
 }
 
-define <4 x i64> @v4i64_func_void() #0 {
+define <4 x i64> @v4i64_func_void() nounwind {
 ; GFX789-LABEL: v4i64_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -748,7 +748,7 @@ define <4 x i64> @v4i64_func_void() #0 {
   ret <4 x i64> %val
 }
 
-define <5 x i64> @v5i64_func_void() #0 {
+define <5 x i64> @v5i64_func_void() nounwind {
 ; GFX789-LABEL: v5i64_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -780,7 +780,7 @@ define <5 x i64> @v5i64_func_void() #0 {
   ret <5 x i64> %val
 }
 
-define <8 x i64> @v8i64_func_void() #0 {
+define <8 x i64> @v8i64_func_void() nounwind {
 ; GFX789-LABEL: v8i64_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -814,7 +814,7 @@ define <8 x i64> @v8i64_func_void() #0 {
   ret <8 x i64> %val
 }
 
-define <16 x i64> @v16i64_func_void() #0 {
+define <16 x i64> @v16i64_func_void() nounwind {
 ; GFX789-LABEL: v16i64_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -856,7 +856,7 @@ define <16 x i64> @v16i64_func_void() #0 {
   ret <16 x i64> %val
 }
 
-define <2 x i16> @v2i16_func_void() #0 {
+define <2 x i16> @v2i16_func_void() nounwind {
 ; CI-LABEL: v2i16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -888,7 +888,7 @@ define <2 x i16> @v2i16_func_void() #0 {
   ret <2 x i16> %val
 }
 
-define <3 x i16> @v3i16_func_void() #0 {
+define <3 x i16> @v3i16_func_void() nounwind {
 ; CI-LABEL: v3i16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -922,7 +922,7 @@ define <3 x i16> @v3i16_func_void() #0 {
   ret <3 x i16> %val
 }
 
-define <4 x i16> @v4i16_func_void() #0 {
+define <4 x i16> @v4i16_func_void() nounwind {
 ; CI-LABEL: v4i16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -957,7 +957,7 @@ define <4 x i16> @v4i16_func_void() #0 {
   ret <4 x i16> %val
 }
 
-define <4 x half> @v4f16_func_void() #0 {
+define <4 x half> @v4f16_func_void() nounwind {
 ; CI-LABEL: v4f16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -996,7 +996,7 @@ define <4 x half> @v4f16_func_void() #0 {
 
 ; FIXME: Mixing buffer and global
 ; FIXME: Should not scalarize
-define <5 x i16> @v5i16_func_void() #0 {
+define <5 x i16> @v5i16_func_void() nounwind {
 ; CI-LABEL: v5i16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1040,7 +1040,7 @@ define <5 x i16> @v5i16_func_void() #0 {
   ret <5 x i16> %val
 }
 
-define <8 x i16> @v8i16_func_void() #0 {
+define <8 x i16> @v8i16_func_void() nounwind {
 ; CI-LABEL: v8i16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1086,7 +1086,7 @@ define <8 x i16> @v8i16_func_void() #0 {
   ret <8 x i16> %val
 }
 
-define <16 x i16> @v16i16_func_void() #0 {
+define <16 x i16> @v16i16_func_void() nounwind {
 ; CI-LABEL: v16i16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1146,7 +1146,7 @@ define <16 x i16> @v16i16_func_void() #0 {
 }
 
 ; FIXME: Should pack
-define <16 x i8> @v16i8_func_void() #0 {
+define <16 x i8> @v16i8_func_void() nounwind {
 ; GFX789-LABEL: v16i8_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1208,7 +1208,7 @@ define <16 x i8> @v16i8_func_void() #0 {
 }
 
 ; FIXME: Should pack
-define <4  x i8> @v4i8_func_void() #0 {
+define <4  x i8> @v4i8_func_void() nounwind {
 ; GFX789-LABEL: v4i8_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1241,7 +1241,7 @@ define <4  x i8> @v4i8_func_void() #0 {
   ret <4  x i8> %val
 }
 
-define {i8, i32} @struct_i8_i32_func_void() #0 {
+define {i8, i32} @struct_i8_i32_func_void() nounwind {
 ; GFX789-LABEL: struct_i8_i32_func_void:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1266,7 +1266,7 @@ define {i8, i32} @struct_i8_i32_func_void() #0 {
   ret { i8, i32 } %val
 }
 
-define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 {
+define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) nounwind {
 ; GFX789-LABEL: void_func_sret_struct_i8_i32:
 ; GFX789:       ; %bb.0:
 ; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1306,7 +1306,7 @@ define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %ar
 ; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call
 ; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
 ; AssertZext inserted. Not using it introduces the spills.
-define <33 x i32> @v33i32_func_void() #0 {
+define <33 x i32> @v33i32_func_void() nounwind {
 ; CI-LABEL: v33i32_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1595,7 +1595,7 @@ define <33 x i32> @v33i32_func_void() #0 {
   ret <33 x i32> %val
 }
 
-define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
+define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() nounwind {
 ; CI-LABEL: struct_v32i32_i32_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1884,7 +1884,7 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
   ret { <32 x i32>, i32 }%val
 }
 
-define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
+define { i32, <32 x i32> } @struct_i32_v32i32_func_void() nounwind {
 ; CI-LABEL: struct_i32_v32i32_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2177,7 +2177,7 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
 }
 
 ; Make sure the last struct component is returned in v3, not v4.
-define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
+define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() nounwind {
 ; CI-LABEL: v3i32_struct_func_void_wasted_reg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2236,7 +2236,7 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
   ret { <3 x i32>, i32 } %insert.4
 }
 
-define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
+define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() nounwind {
 ; CI-LABEL: v3f32_struct_func_void_wasted_reg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2295,7 +2295,7 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
   ret { <3 x float>, i32 } %insert.4
 }
 
-define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 {
+define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) nounwind {
 ; CI-LABEL: void_func_sret_max_known_zero_bits:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2354,7 +2354,7 @@ define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0)
   ret void
 }
 
-define bfloat @bf16_func_void() #0 {
+define bfloat @bf16_func_void() nounwind {
 ; CI-LABEL: bf16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2386,7 +2386,7 @@ define bfloat @bf16_func_void() #0 {
   ret bfloat %val
 }
 
-define <2 x bfloat> @v2bf16_func_void() #0 {
+define <2 x bfloat> @v2bf16_func_void() nounwind {
 ; CI-LABEL: v2bf16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2419,7 +2419,7 @@ define <2 x bfloat> @v2bf16_func_void() #0 {
   ret <2 x bfloat> %val
 }
 
-define <3 x bfloat> @v3bf16_func_void() #0 {
+define <3 x bfloat> @v3bf16_func_void() nounwind {
 ; CI-LABEL: v3bf16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2453,7 +2453,7 @@ define <3 x bfloat> @v3bf16_func_void() #0 {
   ret <3 x bfloat> %val
 }
 
-define <4 x bfloat> @v4bf16_func_void() #0 {
+define <4 x bfloat> @v4bf16_func_void() nounwind {
 ; CI-LABEL: v4bf16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2488,7 +2488,7 @@ define <4 x bfloat> @v4bf16_func_void() #0 {
   ret <4 x bfloat> %val
 }
 
-define <6 x bfloat> @v6bf16_func_void() #0 {
+define <6 x bfloat> @v6bf16_func_void() nounwind {
 ; CI-LABEL: v6bf16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2525,7 +2525,7 @@ define <6 x bfloat> @v6bf16_func_void() #0 {
   ret <6 x bfloat> %val
 }
 
-define <8 x bfloat> @v8bf16_func_void() #0 {
+define <8 x bfloat> @v8bf16_func_void() nounwind {
 ; CI-LABEL: v8bf16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2564,7 +2564,7 @@ define <8 x bfloat> @v8bf16_func_void() #0 {
   ret <8 x bfloat> %val
 }
 
-define <16 x bfloat> @v16bf16_func_void() #0 {
+define <16 x bfloat> @v16bf16_func_void() nounwind {
 ; CI-LABEL: v16bf16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2617,7 +2617,7 @@ define <16 x bfloat> @v16bf16_func_void() #0 {
   ret <16 x bfloat> %val
 }
 
-define <32 x bfloat> @v32bf16_func_void() #0 {
+define <32 x bfloat> @v32bf16_func_void() nounwind {
 ; CI-LABEL: v32bf16_func_void:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2697,5 +2697,3 @@ define <32 x bfloat> @v32bf16_func_void() #0 {
   %val = load <32 x bfloat>, ptr addrspace(1) undef
   ret <32 x bfloat> %val
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
index 1a9334706cb927..80de1514618729 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
@@ -79,7 +79,7 @@ define amdgpu_kernel void @gds_global_align(ptr addrspace(1) %out) {
   ret void
 }
 
-define amdgpu_kernel void @gds_global_align_plus_attr(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @gds_global_align_plus_attr(ptr addrspace(1) %out) "amdgpu-gds-size"="1024" {
 ; GCN-LABEL: gds_global_align_plus_attr:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, 5
@@ -103,7 +103,7 @@ define amdgpu_kernel void @gds_global_align_plus_attr(ptr addrspace(1) %out) #0
 @small.gds = internal addrspace(2) global i8 undef, align 1
 @gds.external = external unnamed_addr addrspace(3) global [0 x i32], align 4
 
-define amdgpu_kernel void @gds_extern_align(ptr addrspace(1) %out, ptr addrspace(2) %gds.arg) #0 {
+define amdgpu_kernel void @gds_extern_align(ptr addrspace(1) %out, ptr addrspace(2) %gds.arg) "amdgpu-gds-size"="1024" {
 ; GCN-LABEL: gds_extern_align:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s0, s[0:1], 0x8
@@ -124,5 +124,3 @@ define amdgpu_kernel void @gds_extern_align(ptr addrspace(1) %out, ptr addrspace
   %val0 = atomicrmw add ptr addrspace(2) %gep.gds0, i32 5 acq_rel
   ret void
 }
-
-attributes #0 = { "amdgpu-gds-size"="1024" }
diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
index 8d44330b1b9733..cb403a6776c2a6 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
@@ -8,7 +8,7 @@
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_add_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_add_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw volatile add ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -17,7 +17,7 @@ define amdgpu_kernel void @atomic_add_ret_gds(ptr addrspace(1) %out, ptr addrspa
 ; FUNC-LABEL: {{^}}atomic_add_ret_gds_const_offset:
 ; GCN: s_movk_i32 m0, 0x80
 ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20 gds
-define amdgpu_kernel void @atomic_add_ret_gds_const_offset(ptr addrspace(1) %out, ptr addrspace(2) %gds) #0 {
+define amdgpu_kernel void @atomic_add_ret_gds_const_offset(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="128" {
   %gep = getelementptr i32, ptr addrspace(2) %gds, i32 5
   %val = atomicrmw volatile add ptr addrspace(2) %gep, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
@@ -28,7 +28,7 @@ define amdgpu_kernel void @atomic_add_ret_gds_const_offset(ptr addrspace(1) %out
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_sub_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_sub_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw sub ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -38,7 +38,7 @@ define amdgpu_kernel void @atomic_sub_ret_gds(ptr addrspace(1) %out, ptr addrspa
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_and_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_and_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw and ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -48,7 +48,7 @@ define amdgpu_kernel void @atomic_and_ret_gds(ptr addrspace(1) %out, ptr addrspa
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_or_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_or_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw or ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -58,7 +58,7 @@ define amdgpu_kernel void @atomic_or_ret_gds(ptr addrspace(1) %out, ptr addrspac
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_xor_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_xor_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw xor ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -68,7 +68,7 @@ define amdgpu_kernel void @atomic_xor_ret_gds(ptr addrspace(1) %out, ptr addrspa
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_umin_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_umin_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw umin ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -78,7 +78,7 @@ define amdgpu_kernel void @atomic_umin_ret_gds(ptr addrspace(1) %out, ptr addrsp
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_umax_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_umax_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw umax ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -88,7 +88,7 @@ define amdgpu_kernel void @atomic_umax_ret_gds(ptr addrspace(1) %out, ptr addrsp
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_imin_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_imin_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw min ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -98,7 +98,7 @@ define amdgpu_kernel void @atomic_imin_ret_gds(ptr addrspace(1) %out, ptr addrsp
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_imax_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_imax_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw max ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -108,7 +108,7 @@ define amdgpu_kernel void @atomic_imax_ret_gds(ptr addrspace(1) %out, ptr addrsp
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_xchg_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_xchg_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = atomicrmw xchg ptr addrspace(2) %gds, i32 5 acq_rel
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -118,12 +118,9 @@ define amdgpu_kernel void @atomic_xchg_ret_gds(ptr addrspace(1) %out, ptr addrsp
 ; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
 ; GCN-DAG: s_movk_i32 m0, 0x1000
 ; GCN: ds_cmpst_rtn_b32 v{{[0-9]+}}, v[[OFF:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} gds
-define amdgpu_kernel void @atomic_cmpxchg_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+define amdgpu_kernel void @atomic_cmpxchg_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) nounwind "amdgpu-gds-size"="4096" {
   %val = cmpxchg ptr addrspace(2) %gds, i32 0, i32 1 acquire acquire
   %x = extractvalue { i32, i1 } %val, 0
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind "amdgpu-gds-size"="128" }
-attributes #1 = { nounwind "amdgpu-gds-size"="4096" }
diff --git a/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll b/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll
index 944dcda5eba6f2..805e271d463f6b 100644
--- a/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
 
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr nocapture, double) #8
+declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr nocapture, double) argmemonly mustprogress nounwind willreturn "target-cpu"="gfx90a"
 
 define protected amdgpu_kernel void @IllegalGEPConst(i32 %a, ptr addrspace(1) %b, double %c) {
 ; CHECK-LABEL: IllegalGEPConst:
@@ -23,8 +23,6 @@ entry:
   %i.2 = sext i32 %i to i64
   %i.3 = getelementptr inbounds double, ptr addrspace(1) %b, i64 %i.2
   %i.4 = addrspacecast ptr addrspace(1) %i.3 to ptr
-  %i.5 = tail call contract double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %i.4, double %c) #8
+  %i.5 = tail call contract double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %i.4, double %c) argmemonly mustprogress nounwind willreturn "target-cpu"="gfx90a"
   ret void
 }
-
-attributes #8 = { argmemonly mustprogress nounwind willreturn "target-cpu"="gfx90a" }
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index 3e1db5fb4e1dc8..2679cac03265e0 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -4,125 +4,125 @@
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10-SCRATCH %s
 
-declare hidden amdgpu_gfx void @external_void_func_i1(i1) #0
-declare hidden amdgpu_gfx void @external_void_func_i1_signext(i1 signext) #0
-declare hidden amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext) #0
+declare hidden amdgpu_gfx void @external_void_func_i1(i1) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i1_signext(i1 signext) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_i8(i8) #0
-declare hidden amdgpu_gfx void @external_void_func_i8_signext(i8 signext) #0
-declare hidden amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext) #0
-declare hidden amdgpu_gfx void @external_void_func_v2i8(<2 x i8>) #0
-declare hidden amdgpu_gfx void @external_void_func_v3i8(<3 x i8>) #0
-declare hidden amdgpu_gfx void @external_void_func_v4i8(<4 x i8>) #0
-declare hidden amdgpu_gfx void @external_void_func_v5i8(<5 x i8>) #0
-declare hidden amdgpu_gfx void @external_void_func_v8i8(<8 x i8>) #0
-declare hidden amdgpu_gfx void @external_void_func_v16i8(<8 x i8>) #0
-declare hidden amdgpu_gfx void @external_void_func_v32i8(<32 x i8>) #0
+declare hidden amdgpu_gfx void @external_void_func_i8(i8) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i8_signext(i8 signext) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2i8(<2 x i8>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3i8(<3 x i8>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4i8(<4 x i8>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v5i8(<5 x i8>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v8i8(<8 x i8>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v16i8(<8 x i8>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v32i8(<32 x i8>) nounwind
 
-declare hidden amdgpu_gfx i8 @external_void_func_i8_ret(i8) #0
-declare hidden amdgpu_gfx <2 x i8> @external_void_func_v2i8_ret(<2 x i8>) #0
-declare hidden amdgpu_gfx <3 x i8> @external_void_func_v3i8_ret(<3 x i8>) #0
-declare hidden amdgpu_gfx <4 x i8> @external_void_func_v4i8_ret(<4 x i8>) #0
-declare hidden amdgpu_gfx <5 x i8> @external_void_func_v5i8_ret(<5 x i8>) #0
-declare hidden amdgpu_gfx <8 x i8> @external_void_func_v8i8_ret(<8 x i8>) #0
-declare hidden amdgpu_gfx <32 x i8> @external_void_func_v32i8_ret(<32 x i8>) #0
+declare hidden amdgpu_gfx i8 @external_void_func_i8_ret(i8) nounwind
+declare hidden amdgpu_gfx <2 x i8> @external_void_func_v2i8_ret(<2 x i8>) nounwind
+declare hidden amdgpu_gfx <3 x i8> @external_void_func_v3i8_ret(<3 x i8>) nounwind
+declare hidden amdgpu_gfx <4 x i8> @external_void_func_v4i8_ret(<4 x i8>) nounwind
+declare hidden amdgpu_gfx <5 x i8> @external_void_func_v5i8_ret(<5 x i8>) nounwind
+declare hidden amdgpu_gfx <8 x i8> @external_void_func_v8i8_ret(<8 x i8>) nounwind
+declare hidden amdgpu_gfx <32 x i8> @external_void_func_v32i8_ret(<32 x i8>) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_i16(i16) #0
-declare hidden amdgpu_gfx void @external_void_func_i16_signext(i16 signext) #0
-declare hidden amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext) #0
+declare hidden amdgpu_gfx void @external_void_func_i16(i16) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i16_signext(i16 signext) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_i32(i32) #0
-declare hidden amdgpu_gfx void @external_void_func_i64(i64) #0
-declare hidden amdgpu_gfx void @external_void_func_v2i64(<2 x i64>) #0
-declare hidden amdgpu_gfx void @external_void_func_v3i64(<3 x i64>) #0
-declare hidden amdgpu_gfx void @external_void_func_v4i64(<4 x i64>) #0
+declare hidden amdgpu_gfx void @external_void_func_i32(i32) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i64(i64) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2i64(<2 x i64>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3i64(<3 x i64>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4i64(<4 x i64>) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_f16(half) #0
-declare hidden amdgpu_gfx void @external_void_func_f32(float) #0
-declare hidden amdgpu_gfx void @external_void_func_f64(double) #0
-declare hidden amdgpu_gfx void @external_void_func_v2f32(<2 x float>) #0
-declare hidden amdgpu_gfx void @external_void_func_v2f64(<2 x double>) #0
-declare hidden amdgpu_gfx void @external_void_func_v3f32(<3 x float>) #0
-declare hidden amdgpu_gfx void @external_void_func_v3f64(<3 x double>) #0
-declare hidden amdgpu_gfx void @external_void_func_v5f32(<5 x float>) #0
+declare hidden amdgpu_gfx void @external_void_func_f16(half) nounwind
+declare hidden amdgpu_gfx void @external_void_func_f32(float) nounwind
+declare hidden amdgpu_gfx void @external_void_func_f64(double) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2f32(<2 x float>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2f64(<2 x double>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3f32(<3 x float>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3f64(<3 x double>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v5f32(<5 x float>) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_v2i16(<2 x i16>) #0
-declare hidden amdgpu_gfx void @external_void_func_v2f16(<2 x half>) #0
-declare hidden amdgpu_gfx void @external_void_func_v3i16(<3 x i16>) #0
-declare hidden amdgpu_gfx void @external_void_func_v3f16(<3 x half>) #0
-declare hidden amdgpu_gfx void @external_void_func_v4i16(<4 x i16>) #0
-declare hidden amdgpu_gfx void @external_void_func_v4f16(<4 x half>) #0
+declare hidden amdgpu_gfx void @external_void_func_v2i16(<2 x i16>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2f16(<2 x half>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3i16(<3 x i16>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3f16(<3 x half>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4i16(<4 x i16>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4f16(<4 x half>) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_bf16(bfloat) #0
-declare hidden amdgpu_gfx void @external_void_func_v1bf16(<1 x bfloat>) #0
-declare hidden amdgpu_gfx void @external_void_func_v2bf16(<2 x bfloat>) #0
-declare hidden amdgpu_gfx void @external_void_func_v3bf16(<3 x bfloat>) #0
-declare hidden amdgpu_gfx void @external_void_func_v4bf16(<4 x bfloat>) #0
-declare hidden amdgpu_gfx void @external_void_func_v8bf16(<8 x bfloat>) #0
-declare hidden amdgpu_gfx void @external_void_func_v16bf16(<16 x bfloat>) #0
+declare hidden amdgpu_gfx void @external_void_func_bf16(bfloat) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v1bf16(<1 x bfloat>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2bf16(<2 x bfloat>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3bf16(<3 x bfloat>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4bf16(<4 x bfloat>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v8bf16(<8 x bfloat>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v16bf16(<16 x bfloat>) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_bf16_inreg(bfloat inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v1bf16_inreg(<1 x bfloat> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v3bf16_inreg(<3 x bfloat> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v4bf16_inreg(<4 x bfloat> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v8bf16_inreg(<8 x bfloat> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v16bf16_inreg(<16 x bfloat> inreg) #0
+declare hidden amdgpu_gfx void @external_void_func_bf16_inreg(bfloat inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v1bf16_inreg(<1 x bfloat> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3bf16_inreg(<3 x bfloat> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4bf16_inreg(<4 x bfloat> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v8bf16_inreg(<8 x bfloat> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v16bf16_inreg(<16 x bfloat> inreg) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_v2i32(<2 x i32>) #0
-declare hidden amdgpu_gfx void @external_void_func_v3i32(<3 x i32>) #0
-declare hidden amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
-declare hidden amdgpu_gfx void @external_void_func_v4i32(<4 x i32>) #0
-declare hidden amdgpu_gfx void @external_void_func_v5i32(<5 x i32>) #0
-declare hidden amdgpu_gfx void @external_void_func_v8i32(<8 x i32>) #0
-declare hidden amdgpu_gfx void @external_void_func_v16i32(<16 x i32>) #0
-declare hidden amdgpu_gfx void @external_void_func_v32i32(<32 x i32>) #0
-declare hidden amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
+declare hidden amdgpu_gfx void @external_void_func_v2i32(<2 x i32>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3i32(<3 x i32>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32>, i32) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4i32(<4 x i32>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v5i32(<5 x i32>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v8i32(<8 x i32>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v16i32(<16 x i32>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v32i32(<32 x i32>) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v32i32_i32(<32 x i32>, i32) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg) #0
+declare hidden amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_f16_inreg(half inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_f32_inreg(float inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_f64_inreg(double inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg) #0
+declare hidden amdgpu_gfx void @external_void_func_f16_inreg(half inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_f32_inreg(float inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_f64_inreg(double inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
+declare hidden amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4f16_inreg(<4 x half> inreg) nounwind
 
-declare hidden amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg, i32 inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg) #0
-declare hidden amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg, i32 inreg) #0
+declare hidden amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg, i32 inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg) nounwind
+declare hidden amdgpu_gfx void @external_void_func_v32i32_i32_inreg(<32 x i32> inreg, i32 inreg) nounwind
 
 ; return value and argument
-declare hidden amdgpu_gfx i32 @external_i32_func_i32(i32) #0
+declare hidden amdgpu_gfx i32 @external_i32_func_i32(i32) nounwind
 
 ; Structs
-declare hidden amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 }) #0
-declare hidden amdgpu_gfx void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
-declare hidden amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
+declare hidden amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 }) nounwind
+declare hidden amdgpu_gfx void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) nounwind
+declare hidden amdgpu_gfx void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) nounwind
 
-define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_i1_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i1_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -243,7 +243,7 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i1_signext:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -372,7 +372,7 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -501,7 +501,7 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i8_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -618,7 +618,7 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i8_signext:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -740,7 +740,7 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -862,7 +862,7 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_i16_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i16_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -979,7 +979,7 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i16_signext:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1101,7 +1101,7 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1223,7 +1223,7 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1340,7 +1340,7 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_i64_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i64_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1460,7 +1460,7 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i64() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1586,7 +1586,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1713,7 +1713,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i64() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1847,7 +1847,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i64() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1987,7 +1987,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_f16_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_f16_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2104,7 +2104,7 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_f32_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_f32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2221,7 +2221,7 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2341,7 +2341,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2465,7 +2465,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2596,7 +2596,7 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_f64_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_f64_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2716,7 +2716,7 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2843,7 +2843,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2977,7 +2977,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i8() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3111,7 +3111,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i8() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3248,7 +3248,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i8() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3389,7 +3389,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v5i8() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v5i8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3538,7 +3538,7 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v8i8() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v8i8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3698,7 +3698,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v32i8() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v32i8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3985,7 +3985,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 {
 }
 
 
-define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 {
+define amdgpu_gfx void @test_call_external_void_func_i8_ret() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i8_ret:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4137,7 +4137,7 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 {
 }
 
 
-define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i8_ret:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4307,7 +4307,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 {
 }
 
 
-define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i8_ret:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4494,7 +4494,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 {
 }
 
 
-define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i8_ret:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4689,7 +4689,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 {
 }
 
 
-define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v5i8_ret:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4904,7 +4904,7 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 {
 }
 
 
-define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v8i8_ret:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5141,7 +5141,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 {
 }
 
 
-define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v32i8_ret:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5668,7 +5668,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 {
 
 
 
-define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i16() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5786,7 +5786,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i16() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5904,7 +5904,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3f16() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6022,7 +6022,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6142,7 +6142,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6263,7 +6263,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i16() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6381,7 +6381,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6502,7 +6502,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2f16() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6620,7 +6620,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i32() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6738,7 +6738,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6858,7 +6858,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -6982,7 +6982,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7109,7 +7109,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i32() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7227,7 +7227,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7354,7 +7354,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7485,7 +7485,7 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v8i32() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v8i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7623,7 +7623,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7764,7 +7764,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v16i32() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v16i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7910,7 +7910,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v32i32() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v32i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8073,7 +8073,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8248,7 +8248,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
+define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) nounwind {
 ; GFX9-LABEL: test_call_external_i32_func_i32_imm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8400,7 +8400,7 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 {
+define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8538,7 +8538,7 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 {
+define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8676,7 +8676,7 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -8859,7 +8859,7 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v16i8() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v16i8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -9065,7 +9065,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 {
   ret void
 }
 
-define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
+define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) nounwind {
 ; GFX9-LABEL: tail_call_byval_align16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -9437,7 +9437,7 @@ entry:
 }
 
 ; inreg arguments are put in sgprs
-define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i1_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -9558,7 +9558,7 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i8_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -9675,7 +9675,7 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i16_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -9792,7 +9792,7 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -9909,7 +9909,7 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_i64_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -10030,7 +10030,7 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -10162,7 +10162,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i64_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -10291,7 +10291,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -10449,7 +10449,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i64_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -10630,7 +10630,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_f16_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -10747,7 +10747,7 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_f32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -10864,7 +10864,7 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2f32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -10985,7 +10985,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3f32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11110,7 +11110,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v5f32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11251,7 +11251,7 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_f64_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11372,7 +11372,7 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2f64_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11501,7 +11501,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3f64_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11654,7 +11654,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11772,7 +11772,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -11896,7 +11896,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12020,7 +12020,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i16_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12141,7 +12141,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3f16_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12262,7 +12262,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12386,7 +12386,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i16_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12507,7 +12507,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2f16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12625,7 +12625,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12749,7 +12749,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2i32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12870,7 +12870,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12995,7 +12995,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3i32_i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -13124,7 +13124,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -13252,7 +13252,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4i32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -13381,7 +13381,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v5i32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -13522,7 +13522,7 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v8i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -13699,7 +13699,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v8i32_imm_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -13876,7 +13876,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v16i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -14133,7 +14133,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 {
+define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v32i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -14524,7 +14524,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v32i32_i32_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -14935,7 +14935,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
   ret void
 }
 
-define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
+define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) nounwind {
 ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -15067,7 +15067,7 @@ entry:
   ret void
 }
 
-define amdgpu_gfx void @stack_12xv3i32() #0 {
+define amdgpu_gfx void @stack_12xv3i32() nounwind {
 ; GFX9-LABEL: stack_12xv3i32:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -15329,7 +15329,7 @@ entry:
   ret void
 }
 
-define amdgpu_gfx void @stack_8xv5i32() #0 {
+define amdgpu_gfx void @stack_8xv5i32() nounwind {
 ; GFX9-LABEL: stack_8xv5i32:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -15613,7 +15613,7 @@ entry:
   ret void
 }
 
-define amdgpu_gfx void @stack_8xv5f32() #0 {
+define amdgpu_gfx void @stack_8xv5f32() nounwind {
 ; GFX9-LABEL: stack_8xv5f32:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -15903,7 +15903,7 @@ entry:
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_bf16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16017,7 +16017,7 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v1bf16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16131,7 +16131,7 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2bf16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16245,7 +16245,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3bf16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16359,7 +16359,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4bf16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16473,7 +16473,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v8bf16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16587,7 +16587,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v16bf16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16701,7 +16701,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16817,7 +16817,7 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg)
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v1bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16933,7 +16933,7 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -17049,7 +17049,7 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inreg %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v3bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -17167,7 +17167,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inreg %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v4bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -17285,7 +17285,7 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inreg %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v8bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -17407,7 +17407,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre
   ret void
 }
 
-define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> inreg %arg) #0 {
+define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> inreg %arg) nounwind {
 ; GFX9-LABEL: test_call_external_void_func_v16bf16_inreg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -17553,15 +17553,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in
   ret void
 }
 
-declare hidden amdgpu_gfx void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
-declare hidden amdgpu_gfx void @stack_passed_f64_arg(<32 x i32>, double) #0
+declare hidden amdgpu_gfx void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) nounwind
+declare hidden amdgpu_gfx void @stack_passed_f64_arg(<32 x i32>, double) nounwind
 declare hidden amdgpu_gfx void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
-    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
+    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) nounwind
 declare hidden amdgpu_gfx void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
-    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
+    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) nounwind
 declare hidden amdgpu_gfx void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
-    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
+    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) nounwind
 declare hidden amdgpu_gfx void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
-    <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline }
+    <5 x float>, <5 x float>, <5 x float>, <5 x float>) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
index a14e3d5673f829..dde8557702fc2f 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
@@ -3,9 +3,9 @@
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
 
-declare hidden amdgpu_gfx void @external_void_func_void() #0
+declare hidden amdgpu_gfx void @external_void_func_void() nounwind
 
-define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
+define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() nounwind {
 ; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -108,12 +108,12 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   call amdgpu_gfx void @external_void_func_void()
-  call void asm sideeffect "", ""() #0
+  call void asm sideeffect "", ""() nounwind
   call amdgpu_gfx void @external_void_func_void()
   ret void
 }
 
-define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 {
+define amdgpu_gfx void @void_func_void_clobber_s28_s29() nounwind noinline {
 ; GFX9-LABEL: void_func_void_clobber_s28_s29:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -194,12 +194,12 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 {
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
-  call void asm sideeffect "; clobber", "~{s[28:29]}"() #0
+  call void asm sideeffect "; clobber", "~{s[30:31]}"() nounwind
+  call void asm sideeffect "; clobber", "~{s[28:29]}"() nounwind
   ret void
 }
 
-define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 {
+define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) nounwind {
 ; GFX9-LABEL: test_call_void_func_void_mayclobber_s31:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -315,7 +315,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 {
+define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) nounwind {
 ; GFX9-LABEL: test_call_void_func_void_mayclobber_v31:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -433,7 +433,7 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1)
 }
 
 
-define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
+define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) nounwind {
 ; GFX9-LABEL: test_call_void_func_void_preserves_s33:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -550,7 +550,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 {
+define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) nounwind {
 ; GFX9-LABEL: test_call_void_func_void_preserves_s34:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -667,7 +667,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) #0 {
+define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) nounwind {
 ; GFX9-LABEL: test_call_void_func_void_preserves_v40:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -777,7 +777,7 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1)
   ret void
 }
 
-define hidden void @void_func_void_clobber_s33() #1 {
+define hidden void @void_func_void_clobber_s33() nounwind noinline {
 ; GFX9-LABEL: void_func_void_clobber_s33:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -831,11 +831,11 @@ define hidden void @void_func_void_clobber_s33() #1 {
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  call void asm sideeffect "; clobber", "~{s33}"() #0
+  call void asm sideeffect "; clobber", "~{s33}"() nounwind
   ret void
 }
 
-define hidden void @void_func_void_clobber_s34() #1 {
+define hidden void @void_func_void_clobber_s34() nounwind noinline {
 ; GFX9-LABEL: void_func_void_clobber_s34:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -889,11 +889,11 @@ define hidden void @void_func_void_clobber_s34() #1 {
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  call void asm sideeffect "; clobber", "~{s34}"() #0
+  call void asm sideeffect "; clobber", "~{s34}"() nounwind
   ret void
 }
 
-define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
+define amdgpu_gfx void @test_call_void_func_void_clobber_s33() nounwind {
 ; GFX9-LABEL: test_call_void_func_void_clobber_s33:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -978,7 +978,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
   ret void
 }
 
-define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
+define amdgpu_gfx void @test_call_void_func_void_clobber_s34() nounwind {
 ; GFX9-LABEL: test_call_void_func_void_clobber_s34:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1063,7 +1063,7 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
   ret void
 }
 
-define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
+define amdgpu_gfx void @callee_saved_sgpr_kernel() nounwind noinline {
 ; GFX9-LABEL: callee_saved_sgpr_kernel:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1171,13 +1171,13 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
 ; GFX11-NEXT:    s_mov_b32 s33, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
+  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() nounwind
   call amdgpu_gfx void @external_void_func_void()
-  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
+  call void asm sideeffect "; use $0", "s"(i32 %s40) nounwind
   ret void
 }
 
-define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
+define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() nounwind noinline {
 ; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1311,13 +1311,10 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
 ; GFX11-NEXT:    s_mov_b32 s33, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
-  %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0
+  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() nounwind
+  %v32 = call i32 asm sideeffect "; def v32", "={v32}"() nounwind
   call amdgpu_gfx void @external_void_func_void()
-  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
-  call void asm sideeffect "; use $0", "v"(i32 %v32) #0
+  call void asm sideeffect "; use $0", "s"(i32 %s40) nounwind
+  call void asm sideeffect "; use $0", "v"(i32 %v32) nounwind
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index c1d682689903ad..288c8a7d6dd527 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11 %s
 
-define amdgpu_gfx i1 @return_i1() #0 {
+define amdgpu_gfx i1 @return_i1() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: return_i1:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -19,7 +19,7 @@ entry:
   ret i1 1
 }
 
-define amdgpu_gfx void @call_i1() #0 {
+define amdgpu_gfx void @call_i1() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: call_i1:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -99,7 +99,7 @@ entry:
   ret void
 }
 
-define amdgpu_gfx i16 @return_i16() #0 {
+define amdgpu_gfx i16 @return_i16() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: return_i16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -115,7 +115,7 @@ entry:
   ret i16 10
 }
 
-define amdgpu_gfx void @call_i16() #0 {
+define amdgpu_gfx void @call_i16() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: call_i16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -195,7 +195,7 @@ entry:
   ret void
 }
 
-define amdgpu_gfx <2 x i16> @return_2xi16() #0 {
+define amdgpu_gfx <2 x i16> @return_2xi16() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: return_2xi16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -211,7 +211,7 @@ entry:
   ret <2 x i16> <i16 1, i16 2>
 }
 
-define amdgpu_gfx void @call_2xi16() #0 {
+define amdgpu_gfx void @call_2xi16() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: call_2xi16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -291,7 +291,7 @@ entry:
   ret void
 }
 
-define amdgpu_gfx <3 x i16> @return_3xi16() #0 {
+define amdgpu_gfx <3 x i16> @return_3xi16() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: return_3xi16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -315,7 +315,7 @@ entry:
   ret <3 x i16> <i16 1, i16 2, i16 3>
 }
 
-define amdgpu_gfx void @call_3xi16() #0 {
+define amdgpu_gfx void @call_3xi16() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: call_3xi16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -397,7 +397,7 @@ entry:
 
 ; Check that return values that overlap CSRs are correctly handled
 
-define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
+define amdgpu_gfx <100 x i32> @return_100xi32() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: return_100xi32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -665,7 +665,7 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
   ret <100 x i32> zeroinitializer
 }
 
-define amdgpu_gfx void @call_100xi32() #0 {
+define amdgpu_gfx void @call_100xi32() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: call_100xi32:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -941,7 +941,7 @@ entry:
 
 ; Check that return values that do not fit in registers do not crash
 
-define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
+define amdgpu_gfx <512 x i32> @return_512xi32() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: return_512xi32:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2244,7 +2244,7 @@ entry:
   ret <512 x i32> zeroinitializer
 }
 
-define amdgpu_gfx void @call_512xi32() #0 {
+define amdgpu_gfx void @call_512xi32() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" {
 ; GFX9-LABEL: call_512xi32:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2333,7 +2333,7 @@ entry:
 
 ; Check that return values larger than VGPR limit are handled correctly
 
-define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 {
+define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) nounwind "amdgpu-num-vgpr"="64" {
 ; GFX9-LABEL: return_72xi32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2757,7 +2757,7 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 {
   ret <72 x i32> %val
 }
 
-define amdgpu_gfx void @call_72xi32() #1 {
+define amdgpu_gfx void @call_72xi32() nounwind "amdgpu-num-vgpr"="64" {
 ; GFX9-LABEL: call_72xi32:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3505,7 +3505,5 @@ entry:
 }
 
 ; Ensure all VGPRs are available
-attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" }
 
 ; Limit to 64 VGPRs
-attributes #1 = { nounwind "amdgpu-num-vgpr"="64" }
diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
index 81239e841e097e..3d5c997ab4fc6c 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
@@ -108,7 +108,7 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
 ; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 15
 ; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 2
 define amdgpu_kernel void @queue_ptr() {
-  %queue.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
+  %queue.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone speculatable willreturn
   %load = load volatile i8, ptr addrspace(4) %queue.ptr
   %id = call i32 @llvm.amdgcn.workgroup.id.x()
   store volatile i32 %id, ptr addrspace(1) undef
@@ -182,16 +182,14 @@ define amdgpu_kernel void @all_inputs() {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare i64 @llvm.amdgcn.dispatch.id() #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable willreturn
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone speculatable willreturn
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone speculatable willreturn
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable willreturn
+declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable willreturn
+declare align 4 ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone speculatable willreturn
+declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone speculatable willreturn
+declare i64 @llvm.amdgcn.dispatch.id() nounwind readnone speculatable willreturn
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll b/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll
index 2f20ca8301bd9d..763f98e5a4146a 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll
@@ -5,7 +5,7 @@
 ; GFX9-DAG:   buffer_load_format_d16_xyzw v[{{[0-9:]+}}], v{{[0-9]+}}, s[{{[0-9:]+}}], 0 idxen ; encoding:
 ; GFX908-DAG: v_mfma_i32_4x4x4i8 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9:]+}}] ; encoding: [{{0x..,0x0.,}}
 ; GFX90A-DAG: v_mfma_i32_4x4x4i8 a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9:]+}}] ; encoding: [{{0x..,0x8.,}}
-define amdgpu_kernel void @test(<4 x i32> %x) #0 {
+define amdgpu_kernel void @test(<4 x i32> %x) nounwind readnone speculatable willreturn "amdgpu-flat-work-group-size"="1,256" {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %.x.int = bitcast <4 x i32> %x to i128
   %.x.ptr = inttoptr i128 %.x.int to ptr addrspace(8)
@@ -18,11 +18,7 @@ define amdgpu_kernel void @test(<4 x i32> %x) #0 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
-declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
-declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32 immarg, i32 immarg, i32 immarg) #2
-
-attributes #0 = { nounwind readnone speculatable willreturn "amdgpu-flat-work-group-size"="1,256" }
-attributes #1 = { nounwind memory(argmem: read) willreturn }
-attributes #2 = { convergent nounwind readnone willreturn }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn "amdgpu-flat-work-group-size"="1,256"
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind memory(argmem: read) willreturn
+declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind memory(argmem: read) willreturn
+declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32 immarg, i32 immarg, i32 immarg) convergent nounwind readnone willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index ee0910b21f0245..e54a754efe0e0c 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -117,7 +117,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(ptr ad
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX908_GFX11_GFX12-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw
   ; GFX908_GFX11_GFX12: bb.0 (%ir-block.0):
   ; GFX908_GFX11_GFX12-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -145,7 +145,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX908-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
   ; GFX908: bb.0 (%ir-block.0):
   ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.4(0x40000000)
@@ -327,7 +327,5 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
   ret void
 }
 
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
-
 declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
 declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 3454e9d1019e55..2e0774bff7d347 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -124,7 +124,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(ptr addr
   ret float %ret
 }
 
-define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) #0 {
+define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw
   ; GFX90A_GFX940: bb.0 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -154,7 +154,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %p
   ret float %ret
 }
 
-define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 {
+define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX11-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
   ; GFX11: bb.0 (%ir-block.0):
   ; GFX11-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
@@ -238,5 +238,3 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
 
 declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
 declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll
index 9d8b987d2ba68c..d1bd63ebf88d72 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll
@@ -170,7 +170,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(ptr add
   ret double %ret
 }
 
-define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.0 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -189,7 +189,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
+define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
   ; GFX90A_GFX940: bb.0 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -212,7 +212,7 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %
   ret double %ret
 }
 
-define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.0 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -231,7 +231,7 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspa
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
+define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) "amdgpu-unsafe-fp-atomics"="true" {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
   ; GFX90A_GFX940: bb.0 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -256,5 +256,3 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
 
 declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
 declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
-
-attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll
index 0612383c3f90b1..f042ddf1b3895c 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll
@@ -8,7 +8,7 @@
 ; DISASSEMBLY-VI: .long 0xdd348000                                           // {{[0-9A-Z]+}}: DD348000
 ; DISASSEMBLY-VI-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc                     // {{[0-9A-Z]+}}: 00000100
 
-define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" {
 ; GCN-LABEL: global_atomic_fadd_noret_f32_wrong_subtarget:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_mov_b64 s[2:3], exec
@@ -32,5 +32,3 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(ptr addr
   %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 syncscope("agent") seq_cst
   ret void
 }
-
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
index 5889de7faf3e5e..bade6b6037103c 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-define amdgpu_kernel void @global_atomic_fadd_ret_f32(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_ret_f32(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX900-LABEL: global_atomic_fadd_ret_f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_mov_b64 s[4:5], exec
@@ -219,7 +219,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(ptr addrspace(1) %ptr) #0
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(ptr addrspace(1) %ptr) #2 {
+define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(ptr addrspace(1) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX900-LABEL: global_atomic_fadd_ret_f32_ieee:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_mov_b64 s[4:5], exec
@@ -407,7 +407,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(ptr addrspace(1) %ptr
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_noret_f32(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_noret_f32(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX900-LABEL: global_atomic_fadd_noret_f32:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_mov_b64 s[2:3], exec
@@ -539,7 +539,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32(ptr addrspace(1) %ptr) #
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(ptr addrspace(1) %ptr) #2 {
+define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(ptr addrspace(1) %ptr) "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX900-LABEL: global_atomic_fadd_noret_f32_ieee:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_mov_b64 s[2:3], exec
@@ -671,7 +671,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(ptr addrspace(1) %p
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX900-LABEL: global_atomic_fadd_ret_f32_agent:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_mov_b64 s[4:5], exec
@@ -855,7 +855,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_agent(ptr addrspace(1) %pt
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX900-LABEL: global_atomic_fadd_ret_f32_system:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_mov_b64 s[4:5], exec
@@ -1069,7 +1069,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(ptr addrspace(1) %p
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" {
 ; GCN-LABEL: global_atomic_fadd_ret_f32_wrong_subtarget:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_mov_b64 s[4:5], exec
@@ -1156,7 +1156,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" {
 ; GCN-LABEL: global_atomic_fadd_noret_f32_wrong_subtarget:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_mov_b64 s[2:3], exec
@@ -1370,7 +1370,7 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe(ptr addrspace(1) %p
   ret void
 }
 
-define amdgpu_kernel void @infer_as_before_atomic(ptr addrspace(4) %arg) #0 {
+define amdgpu_kernel void @infer_as_before_atomic(ptr addrspace(4) %arg) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX900-LABEL: infer_as_before_atomic:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_mov_b64 s[2:3], exec
@@ -1501,7 +1501,7 @@ define amdgpu_kernel void @infer_as_before_atomic(ptr addrspace(4) %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX900-LABEL: global_atomic_fadd_ret_bf16_agent:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
@@ -1716,7 +1716,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_agent(ptr addrspace(1) %p
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX900-LABEL: global_atomic_fadd_ret_bf16_system:
 ; GFX900:       ; %bb.0:
 ; GFX900-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
@@ -1932,7 +1932,3 @@ define amdgpu_kernel void @global_atomic_fadd_ret_bf16_system(ptr addrspace(1) %
   store bfloat %result, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #2 = { "amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll
index 6b4a6381d954cb..b74b22fcb2822b 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll
@@ -2732,5 +2732,3 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
   %unused = atomicrmw umin ptr addrspace(1) %gep1, i64 %data seq_cst
   ret void
 }
-
-attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll
index 79de55eb63bf81..7e5aa2d91aea18 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll
@@ -55,6 +55,4 @@ define amdgpu_ps void @global_csub_saddr_i32_nortn_neg128(ptr addrspace(1) inreg
   ret void
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0
-
-attributes #0 = { argmemonly nounwind willreturn }
+declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) argmemonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll
index 0147084a6996f3..b2b0eb5e53172b 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll
@@ -53,7 +53,5 @@ define amdgpu_ps void @global_fadd_saddr_v2f16_nortn_neg128(ptr addrspace(1) inr
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1(ptr addrspace(1) nocapture, float) #0
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1(ptr addrspace(1) nocapture, <2 x half>) #0
-
-attributes #0 = { argmemonly nounwind willreturn }
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1(ptr addrspace(1) nocapture, float) argmemonly nounwind willreturn
+declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1(ptr addrspace(1) nocapture, <2 x half>) argmemonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
index a3c8bb141fd93b..b95e6ed06f883a 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
@@ -3198,5 +3198,3 @@ define amdgpu_ps void @global_dec_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
   %unused = atomicrmw udec_wrap ptr addrspace(1) %gep1, i64 %data syncscope("agent") monotonic
   ret void
 }
-
-attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll b/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll
index b38758bae537d8..4e1390531a947c 100644
--- a/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll
@@ -4,7 +4,7 @@
 ; GCN: flat_load_dword
 ; GCN: flat_load_dword
 ; GCN: flat_store_dword
-define void @unknown_memdep_analysis(ptr addrspace(1) nocapture readonly %arg, float %arg1) #0 {
+define void @unknown_memdep_analysis(ptr addrspace(1) nocapture readonly %arg, float %arg1) nounwind {
 bb:
   %tmp53 = load float, ptr addrspace(1) undef, align 4
   %tmp54 = getelementptr inbounds float, ptr addrspace(1) %arg, i32 31
@@ -14,7 +14,4 @@ bb:
   ret void
 }
 
-declare float @llvm.fmuladd.f32(float, float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
index 538ef42121b83b..df5e877233f62a 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
@@ -6,7 +6,7 @@
 ; Optimization remains same for Iterative and DPP strategies when value in uniform. These different scan/reduction
 ; strategies are valid for only divergent values. This optimization is valid for divergent addresses. Test also covers different scopes.
 
-define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
+define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
 ; IR-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
 ; IR-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
@@ -43,7 +43,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_uns
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
+define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
@@ -144,7 +144,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_sco
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 {
+define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
@@ -213,7 +213,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_un
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) #1 {
+define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
@@ -314,7 +314,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_un
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
+define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) strictfp {
 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
@@ -383,7 +383,7 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
+define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) strictfp {
 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
@@ -484,7 +484,7 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
+define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
 ; IR-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
 ; IR-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
@@ -517,7 +517,7 @@ define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_uns
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
+define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
@@ -618,7 +618,7 @@ define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_uns
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{
+define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true"{
 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
@@ -679,7 +679,7 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_uns
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) #1{
+define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true"{
 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
@@ -780,7 +780,7 @@ define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_uns
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
+define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) strictfp {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
@@ -849,7 +849,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_st
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
+define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) strictfp {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
@@ -950,7 +950,7 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_st
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 {
+define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -959,7 +959,7 @@ define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_uns
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -968,7 +968,7 @@ define amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_uns
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1 {
+define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -977,7 +977,7 @@ define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_un
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1 {
+define amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -986,7 +986,7 @@ define amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_un
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
+define amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) strictfp {
 ; IR-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -995,7 +995,7 @@ define amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_str
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
+define amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) strictfp {
 ; IR-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -1004,7 +1004,7 @@ define amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_str
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 {
+define amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -1013,7 +1013,7 @@ define amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agent_scope(ptr
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_ps float @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -1022,7 +1022,7 @@ define amdgpu_ps float @global_atomic_fmin_div_address_div_value_agent_scope(ptr
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1{
+define amdgpu_ps float @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true"{
 ; IR-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -1031,7 +1031,7 @@ define amdgpu_ps float @global_atomic_fmax_div_address_uni_value_agent_scope_uns
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1{
+define amdgpu_ps float @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true"{
 ; IR-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -1040,7 +1040,7 @@ define amdgpu_ps float @global_atomic_fmax_div_address_div_value_agent_scope_uns
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
+define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) strictfp {
 ; IR-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -1049,7 +1049,7 @@ define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_system_scope_st
   ret float %result
 }
 
-define amdgpu_ps float @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
+define amdgpu_ps float @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) strictfp {
 ; IR-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
 ; IR-NEXT:    ret float [[RESULT]]
@@ -1057,7 +1057,3 @@ define amdgpu_ps float @global_atomic_fadd_div_address_div_value_system_scope_st
   %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
   ret float %result
 }
-
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #2 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan.ll
index f954560d0f5ca9..a484f894a02d1e 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN:  opt -S -mtriple=amdgcn-- -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR %s
 
-define amdgpu_kernel void @uniform_value(ptr addrspace(1) , ptr addrspace(1) %val) #0 {
+define amdgpu_kernel void @uniform_value(ptr addrspace(1) , ptr addrspace(1) %val) "target-cpu"="gfx906" {
 ; IR-LABEL: @uniform_value(
 ; IR-NEXT:  entry:
 ; IR-NEXT:    [[UNIFORM_VALUE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -50,7 +50,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, ptr addrspace(1) %val) #0 {
+define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, ptr addrspace(1) %val) "target-cpu"="gfx906" {
 ; IR-LABEL: @divergent_value(
 ; IR-NEXT:  entry:
 ; IR-NEXT:    [[DIVERGENT_VALUE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -111,11 +111,8 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workgroup.id.x() nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 
-attributes #0 = {"target-cpu"="gfx906"}
-attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none)}
-
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll
index fab24e10f810ff..c91263e0b19f2d 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll
@@ -2,7 +2,7 @@
 ; RUN:  opt -S -mtriple=amdgcn-- -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
 ; RUN:  opt -S -mtriple=amdgcn-- -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
 declare i32 @llvm.amdgcn.workitem.id.x()
-define amdgpu_kernel void @global_atomic_fadd_uni_value(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_uni_value(ptr addrspace(1) %ptr) "target-cpu"="gfx906" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_value(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
 ; IR-ITERATIVE-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
@@ -46,7 +46,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_value(ptr addrspace(1) %ptr) #
 }
 
 
-define amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) "target-cpu"="gfx906" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_value(
 ; IR-ITERATIVE-NEXT:    [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; IR-ITERATIVE-NEXT:    [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
@@ -124,7 +124,7 @@ define amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) #
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fsub_uni_value(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fsub_uni_value(ptr addrspace(1) %ptr) "target-cpu"="gfx906" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_value(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
 ; IR-ITERATIVE-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
@@ -168,7 +168,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_value(ptr addrspace(1) %ptr) #
 }
 
 
-define amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) "target-cpu"="gfx906" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_value(
 ; IR-ITERATIVE-NEXT:    [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; IR-ITERATIVE-NEXT:    [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
@@ -245,5 +245,3 @@ define amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) #
   %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue seq_cst
   ret void
 }
-
-attributes #0 = {"target-cpu"="gfx906"}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll
index cc7a45cbb6e374..c76430ce8b3eed 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll
@@ -6,7 +6,7 @@
 ; Optimization remains same for Iterative and DPP strategies when value in uniform. These different scan/reduction
 ; strategies are valid for only divergent values. This optimization is valid for divergent addresses. Test also covers different scopes.
 
-define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
+define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
 ; IR-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
 ; IR-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
@@ -35,7 +35,7 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_agent_scope_unsa
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
+define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
@@ -117,7 +117,7 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_scope_agent_scop
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 {
+define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
@@ -170,7 +170,7 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_one_as_scope_uns
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) #1 {
+define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
@@ -252,7 +252,7 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_uns
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
+define amdgpu_ps void @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) strictfp {
 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
@@ -305,7 +305,7 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_uni_value_agent_scope_stri
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
+define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) strictfp {
 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
@@ -387,7 +387,7 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_stri
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
+define amdgpu_ps void @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
 ; IR-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
 ; IR-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
@@ -412,7 +412,7 @@ define amdgpu_ps void @global_atomic_fmin_uni_address_uni_value_agent_scope_unsa
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
+define amdgpu_ps void @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
@@ -494,7 +494,7 @@ define amdgpu_ps void @global_atomic_fmin_uni_address_div_value_agent_scope_unsa
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{
+define amdgpu_ps void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true"{
 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
@@ -539,7 +539,7 @@ define amdgpu_ps void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsa
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) #1{
+define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true"{
 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
@@ -621,7 +621,7 @@ define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsa
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
+define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) strictfp {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
@@ -674,7 +674,7 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_system_scope_str
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
+define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) strictfp {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
 ; IR-ITERATIVE-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
@@ -756,7 +756,7 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_str
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 {
+define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -765,7 +765,7 @@ define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_agent_scope_unsa
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_ps void @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -774,7 +774,7 @@ define amdgpu_ps void @global_atomic_fadd_div_address_div_value_agent_scope_unsa
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1 {
+define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -783,7 +783,7 @@ define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_one_as_scope_uns
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1 {
+define amdgpu_ps void @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -792,7 +792,7 @@ define amdgpu_ps void @global_atomic_fadd_div_address_div_value_one_as_scope_uns
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
+define amdgpu_ps void @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) strictfp {
 ; IR-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -801,7 +801,7 @@ define amdgpu_ps void @global_atomic_fsub_div_address_uni_value_agent_scope_stri
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
+define amdgpu_ps void @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) strictfp {
 ; IR-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -810,7 +810,7 @@ define amdgpu_ps void @global_atomic_fsub_div_address_div_value_agent_scope_stri
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 {
+define amdgpu_ps void @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -819,7 +819,7 @@ define amdgpu_ps void @global_atomic_fmin_div_address_uni_value_agent_scope(ptr
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 {
+define amdgpu_ps void @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; IR-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -828,7 +828,7 @@ define amdgpu_ps void @global_atomic_fmin_div_address_div_value_agent_scope(ptr
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1{
+define amdgpu_ps void @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true"{
 ; IR-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -837,7 +837,7 @@ define amdgpu_ps void @global_atomic_fmax_div_address_uni_value_agent_scope_unsa
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1{
+define amdgpu_ps void @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true"{
 ; IR-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
 ; IR-NEXT:    ret void
@@ -846,7 +846,7 @@ define amdgpu_ps void @global_atomic_fmax_div_address_div_value_agent_scope_unsa
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
+define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) strictfp {
 ; IR-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
 ; IR-NEXT:    ret void
@@ -855,7 +855,7 @@ define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_system_scope_str
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
+define amdgpu_ps void @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) strictfp {
 ; IR-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
 ; IR-NEXT:    [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
 ; IR-NEXT:    ret void
@@ -863,7 +863,3 @@ define amdgpu_ps void @global_atomic_fadd_div_address_div_value_system_scope_str
   %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
   ret void
 }
-
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #2 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 96c615b974ce17..38ff1acdcc86f6 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -14,7 +14,7 @@
 
 declare float @div.float.value()
 
-define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b64 s[2:3], exec
@@ -332,7 +332,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -1051,7 +1051,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
@@ -1510,7 +1510,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope
 }
 
 
-define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -2281,7 +2281,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr) #2{
+define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr) strictfp{
 ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
@@ -2740,7 +2740,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_
 }
 
 
-define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -3460,7 +3460,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 }
 
 
-define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -4179,7 +4179,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scope_strictfp(ptr addrspace(1) %ptr) #2 {
+define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scope_strictfp(ptr addrspace(1) %ptr) strictfp {
 ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_uni_value_defalut_scope_strictfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
@@ -4637,7 +4637,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_defalut_scop
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_defalut_scope_strictfp(ptr addrspace(1) %ptr) #2 {
+define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_defalut_scope_strictfp(ptr addrspace(1) %ptr) strictfp {
 ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_defalut_scope_strictfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -5408,9 +5408,5 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_defalut_scop
   ret void
 }
 
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #2 = { strictfp}
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index 3cc5a4cd1d0aa1..be1a4587b5c65a 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -14,7 +14,7 @@
 
 declare float @div.float.value()
 
-define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -349,7 +349,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -1192,7 +1192,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_one_as_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -1528,7 +1528,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_one_as_scope
 }
 
 
-define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -2372,7 +2372,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 }
 
 
-define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_defalut_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_defalut_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_uni_value_defalut_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -2707,7 +2707,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_uni_value_defalut_scop
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_defalut_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_defalut_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_defalut_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -3550,7 +3550,5 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_defalut_scop
   ret void
 }
 
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 314c52a71d938f..ac5ff5342010bd 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -14,7 +14,7 @@
 
 declare float @div.float.value()
 
-define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -349,7 +349,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -1192,7 +1192,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_uni_value_one_as_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -1528,7 +1528,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_one_as_scope
 }
 
 
-define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -2372,7 +2372,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 }
 
 
-define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_defalut_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_defalut_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_uni_value_defalut_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
@@ -2707,7 +2707,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_uni_value_defalut_scop
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_defalut_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_defalut_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_defalut_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -3550,7 +3550,5 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_defalut_scop
   ret void
 }
 
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index bc9125e326c4d9..b07c5abdfafd3e 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -14,7 +14,7 @@
 
 declare float @div.float.value()
 
-define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b64 s[2:3], exec
@@ -384,7 +384,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -1155,7 +1155,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
@@ -1614,7 +1614,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope
 }
 
 
-define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -2385,7 +2385,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr) #2{
+define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr) strictfp{
 ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
@@ -2844,7 +2844,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_
 }
 
 
-define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr) "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -3616,7 +3616,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 }
 
 
-define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr) strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" {
 ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -4387,7 +4387,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scope_strictfp(ptr addrspace(1) %ptr) #2 {
+define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scope_strictfp(ptr addrspace(1) %ptr) strictfp {
 ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_uni_value_defalut_scope_strictfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
@@ -4845,7 +4845,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_defalut_scop
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_defalut_scope_strictfp(ptr addrspace(1) %ptr) #2 {
+define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_defalut_scope_strictfp(ptr addrspace(1) %ptr) strictfp {
 ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_defalut_scope_strictfp:
 ; GFX7LESS:       ; %bb.0:
 ; GFX7LESS-NEXT:    s_mov_b32 s32, 0
@@ -5616,9 +5616,5 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_defalut_scop
   ret void
 }
 
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #2 = { strictfp}
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/global_smrd.ll b/llvm/test/CodeGen/AMDGPU/global_smrd.ll
index 3b71e8ffefbf8c..54311521d124bd 100644
--- a/llvm/test/CodeGen/AMDGPU/global_smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_smrd.ll
@@ -47,7 +47,7 @@ bb:
 
 define amdgpu_kernel void @non-uniform_load(ptr addrspace(1) %arg, [8 x i32], ptr addrspace(1) %arg1) #0 {
 bb:
-  %tmp = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp2 = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %tmp
   %tmp3 = load float, ptr addrspace(1) %tmp2, align 4, !tbaa !8
   %tmp4 = fadd float %tmp3, 0.000000e+00
@@ -134,9 +134,7 @@ entry:
 }
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 !8 = !{!9, !9, i64 0}
 !9 = !{!"float", !10, i64 0}
diff --git a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll
index 670666b7824341..9e6c826b63d9e8 100644
--- a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll
@@ -25,7 +25,7 @@
 
 ; CHECK: flat_store_dword
 
-define amdgpu_kernel void @cfg(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) #0 {
+define amdgpu_kernel void @cfg(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) "target-cpu"="fiji" {
 bb:
   %tmp = sext i32 %arg2 to i64
   %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp
@@ -85,7 +85,7 @@ bb22:                                             ; preds = %bb20, %bb11
 
 ; CHECK: flat_load_dword
 
-define amdgpu_kernel void @cfg_selfloop(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) #0 {
+define amdgpu_kernel void @cfg_selfloop(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) "target-cpu"="fiji" {
 bb:
   br label %bb1
 
@@ -106,9 +106,6 @@ bb1:
   br i1 %tmp31, label %bb2, label %bb1
 }
 
-
-attributes #0 = { "target-cpu"="fiji" }
-
 !0 = !{!1, !1, i64 0}
 !1 = !{!"int", !2, i64 0}
 !2 = !{!"omnipotent char", !3, i64 0}
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir b/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir
index 6f1e5b89db8841..d13ba78f43ecc6 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir
+++ b/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir
@@ -15,13 +15,11 @@
 
 --- |
 
-  define void @use_global_assign() #0 {
+  define void @use_global_assign() "amdgpu-waves-per-eu"="10,10" {
   entry:
     unreachable
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
-
 ...
 ---
 name: use_global_assign
diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll
index e2d55990473c09..56aabfe8c6e7d4 100644
--- a/llvm/test/CodeGen/AMDGPU/half.ll
+++ b/llvm/test/CodeGen/AMDGPU/half.ll
@@ -5,7 +5,7 @@
 
 ; half args should be promoted to float for CI and lower.
 
-define amdgpu_kernel void @load_f16_arg(ptr addrspace(1) %out, half %arg) #0 {
+define amdgpu_kernel void @load_f16_arg(ptr addrspace(1) %out, half %arg) nounwind {
 ; CI-LABEL: load_f16_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -43,7 +43,7 @@ define amdgpu_kernel void @load_f16_arg(ptr addrspace(1) %out, half %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @load_v2f16_arg(ptr addrspace(1) %out, <2 x half> %arg) #0 {
+define amdgpu_kernel void @load_v2f16_arg(ptr addrspace(1) %out, <2 x half> %arg) nounwind {
 ; CI-LABEL: load_v2f16_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -81,7 +81,7 @@ define amdgpu_kernel void @load_v2f16_arg(ptr addrspace(1) %out, <2 x half> %arg
   ret void
 }
 
-define amdgpu_kernel void @load_v3f16_arg(ptr addrspace(1) %out, <3 x half> %arg) #0 {
+define amdgpu_kernel void @load_v3f16_arg(ptr addrspace(1) %out, <3 x half> %arg) nounwind {
 ; CIVI-LABEL: load_v3f16_arg:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -116,7 +116,7 @@ define amdgpu_kernel void @load_v3f16_arg(ptr addrspace(1) %out, <3 x half> %arg
 
 
 ; FIXME: Why not one load?
-define amdgpu_kernel void @load_v4f16_arg(ptr addrspace(1) %out, <4 x half> %arg) #0 {
+define amdgpu_kernel void @load_v4f16_arg(ptr addrspace(1) %out, <4 x half> %arg) nounwind {
 ; CIVI-LABEL: load_v4f16_arg:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -142,7 +142,7 @@ define amdgpu_kernel void @load_v4f16_arg(ptr addrspace(1) %out, <4 x half> %arg
   ret void
 }
 
-define amdgpu_kernel void @load_v8f16_arg(ptr addrspace(1) %out, <8 x half> %arg) #0 {
+define amdgpu_kernel void @load_v8f16_arg(ptr addrspace(1) %out, <8 x half> %arg) nounwind {
 ; CI-LABEL: load_v8f16_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
@@ -188,7 +188,7 @@ define amdgpu_kernel void @load_v8f16_arg(ptr addrspace(1) %out, <8 x half> %arg
   ret void
 }
 
-define amdgpu_kernel void @extload_v2f16_arg(ptr addrspace(1) %out, <2 x half> %in) #0 {
+define amdgpu_kernel void @extload_v2f16_arg(ptr addrspace(1) %out, <2 x half> %in) nounwind {
 ; CI-LABEL: extload_v2f16_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -234,7 +234,7 @@ define amdgpu_kernel void @extload_v2f16_arg(ptr addrspace(1) %out, <2 x half> %
   ret void
 }
 
-define amdgpu_kernel void @extload_f16_to_f32_arg(ptr addrspace(1) %out, half %arg) #0 {
+define amdgpu_kernel void @extload_f16_to_f32_arg(ptr addrspace(1) %out, half %arg) nounwind {
 ; CI-LABEL: extload_f16_to_f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -274,7 +274,7 @@ define amdgpu_kernel void @extload_f16_to_f32_arg(ptr addrspace(1) %out, half %a
   ret void
 }
 
-define amdgpu_kernel void @extload_v2f16_to_v2f32_arg(ptr addrspace(1) %out, <2 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v2f16_to_v2f32_arg(ptr addrspace(1) %out, <2 x half> %arg) nounwind {
 ; CI-LABEL: extload_v2f16_to_v2f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -320,7 +320,7 @@ define amdgpu_kernel void @extload_v2f16_to_v2f32_arg(ptr addrspace(1) %out, <2
   ret void
 }
 
-define amdgpu_kernel void @extload_v3f16_to_v3f32_arg(ptr addrspace(1) %out, <3 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v3f16_to_v3f32_arg(ptr addrspace(1) %out, <3 x half> %arg) nounwind {
 ; CI-LABEL: extload_v3f16_to_v3f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -365,7 +365,7 @@ define amdgpu_kernel void @extload_v3f16_to_v3f32_arg(ptr addrspace(1) %out, <3
   ret void
 }
 
-define amdgpu_kernel void @extload_v4f16_to_v4f32_arg(ptr addrspace(1) %out, <4 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v4f16_to_v4f32_arg(ptr addrspace(1) %out, <4 x half> %arg) nounwind {
 ; CI-LABEL: extload_v4f16_to_v4f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -416,7 +416,7 @@ define amdgpu_kernel void @extload_v4f16_to_v4f32_arg(ptr addrspace(1) %out, <4
   ret void
 }
 
-define amdgpu_kernel void @extload_v8f16_to_v8f32_arg(ptr addrspace(1) %out, <8 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v8f16_to_v8f32_arg(ptr addrspace(1) %out, <8 x half> %arg) nounwind {
 ; CI-LABEL: extload_v8f16_to_v8f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x4
@@ -503,7 +503,7 @@ define amdgpu_kernel void @extload_v8f16_to_v8f32_arg(ptr addrspace(1) %out, <8
   ret void
 }
 
-define amdgpu_kernel void @extload_f16_to_f64_arg(ptr addrspace(1) %out, half %arg) #0 {
+define amdgpu_kernel void @extload_f16_to_f64_arg(ptr addrspace(1) %out, half %arg) nounwind {
 ; CI-LABEL: extload_f16_to_f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x2
@@ -549,7 +549,7 @@ define amdgpu_kernel void @extload_f16_to_f64_arg(ptr addrspace(1) %out, half %a
   ret void
 }
 
-define amdgpu_kernel void @extload_v2f16_to_v2f64_arg(ptr addrspace(1) %out, <2 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v2f16_to_v2f64_arg(ptr addrspace(1) %out, <2 x half> %arg) nounwind {
 ; CI-LABEL: extload_v2f16_to_v2f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x2
@@ -604,7 +604,7 @@ define amdgpu_kernel void @extload_v2f16_to_v2f64_arg(ptr addrspace(1) %out, <2
   ret void
 }
 
-define amdgpu_kernel void @extload_v3f16_to_v3f64_arg(ptr addrspace(1) %out, <3 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v3f16_to_v3f64_arg(ptr addrspace(1) %out, <3 x half> %arg) nounwind {
 ; CI-LABEL: extload_v3f16_to_v3f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -672,7 +672,7 @@ define amdgpu_kernel void @extload_v3f16_to_v3f64_arg(ptr addrspace(1) %out, <3
   ret void
 }
 
-define amdgpu_kernel void @extload_v4f16_to_v4f64_arg(ptr addrspace(1) %out, <4 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v4f16_to_v4f64_arg(ptr addrspace(1) %out, <4 x half> %arg) nounwind {
 ; CI-LABEL: extload_v4f16_to_v4f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -751,7 +751,7 @@ define amdgpu_kernel void @extload_v4f16_to_v4f64_arg(ptr addrspace(1) %out, <4
   ret void
 }
 
-define amdgpu_kernel void @extload_v8f16_to_v8f64_arg(ptr addrspace(1) %out, <8 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v8f16_to_v8f64_arg(ptr addrspace(1) %out, <8 x half> %arg) nounwind {
 ; CI-LABEL: extload_v8f16_to_v8f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x4
@@ -886,7 +886,7 @@ define amdgpu_kernel void @extload_v8f16_to_v8f64_arg(ptr addrspace(1) %out, <8
   ret void
 }
 
-define amdgpu_kernel void @global_load_store_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_store_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CIVI-LABEL: global_load_store_f16:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -916,7 +916,7 @@ define amdgpu_kernel void @global_load_store_f16(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-define amdgpu_kernel void @global_load_store_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_store_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CIVI-LABEL: global_load_store_v2f16:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -946,7 +946,7 @@ define amdgpu_kernel void @global_load_store_v2f16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @global_load_store_v4f16(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @global_load_store_v4f16(ptr addrspace(1) %in, ptr addrspace(1) %out) nounwind {
 ; CIVI-LABEL: global_load_store_v4f16:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -976,7 +976,7 @@ define amdgpu_kernel void @global_load_store_v4f16(ptr addrspace(1) %in, ptr add
   ret void
 }
 
-define amdgpu_kernel void @global_load_store_v8f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_store_v8f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CIVI-LABEL: global_load_store_v8f16:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1006,7 +1006,7 @@ define amdgpu_kernel void @global_load_store_v8f16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @global_extload_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CIVI-LABEL: global_extload_f16_to_f32:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1039,7 +1039,7 @@ define amdgpu_kernel void @global_extload_f16_to_f32(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v2f16_to_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v2f16_to_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v2f16_to_v2f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1092,7 +1092,7 @@ define amdgpu_kernel void @global_extload_v2f16_to_v2f32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v3f16_to_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v3f16_to_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v3f16_to_v3f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1148,7 +1148,7 @@ define amdgpu_kernel void @global_extload_v3f16_to_v3f32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v4f16_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v4f16_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v4f16_to_v4f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1209,7 +1209,7 @@ define amdgpu_kernel void @global_extload_v4f16_to_v4f32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v8f16_to_v8f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v8f16_to_v8f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v8f16_to_v8f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1297,7 +1297,7 @@ define amdgpu_kernel void @global_extload_v8f16_to_v8f32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v16f16_to_v16f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v16f16_to_v16f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v16f16_to_v16f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1454,7 +1454,7 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_f16_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_f16_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CIVI-LABEL: global_extload_f16_to_f64:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1490,7 +1490,7 @@ define amdgpu_kernel void @global_extload_f16_to_f64(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v2f16_to_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v2f16_to_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v2f16_to_v2f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1550,7 +1550,7 @@ define amdgpu_kernel void @global_extload_v2f16_to_v2f64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v3f16_to_v3f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v3f16_to_v3f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v3f16_to_v3f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1628,7 +1628,7 @@ define amdgpu_kernel void @global_extload_v3f16_to_v3f64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v4f16_to_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v4f16_to_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v4f16_to_v4f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1715,7 +1715,7 @@ define amdgpu_kernel void @global_extload_v4f16_to_v4f64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v8f16_to_v8f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v8f16_to_v8f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v8f16_to_v8f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1849,7 +1849,7 @@ define amdgpu_kernel void @global_extload_v8f16_to_v8f64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_extload_v16f16_to_v16f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2099,7 +2099,7 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_truncstore_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CIVI-LABEL: global_truncstore_f32_to_f16:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2132,7 +2132,7 @@ define amdgpu_kernel void @global_truncstore_f32_to_f16(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v2f32_to_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_truncstore_v2f32_to_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_truncstore_v2f32_to_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2187,7 +2187,7 @@ define amdgpu_kernel void @global_truncstore_v2f32_to_v2f16(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v3f32_to_v3f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_truncstore_v3f32_to_v3f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_truncstore_v3f32_to_v3f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2257,7 +2257,7 @@ define amdgpu_kernel void @global_truncstore_v3f32_to_v3f16(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v4f32_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_truncstore_v4f32_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_truncstore_v4f32_to_v4f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2322,7 +2322,7 @@ define amdgpu_kernel void @global_truncstore_v4f32_to_v4f16(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v8f32_to_v8f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_truncstore_v8f32_to_v8f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_truncstore_v8f32_to_v8f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2422,7 +2422,7 @@ define amdgpu_kernel void @global_truncstore_v8f32_to_v8f16(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v16f32_to_v16f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_truncstore_v16f32_to_v16f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: global_truncstore_v16f32_to_v16f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2603,7 +2603,7 @@ define amdgpu_kernel void @global_truncstore_v16f32_to_v16f16(ptr addrspace(1) %
 }
 
 ; FIXME: Unsafe math should fold conversions away
-define amdgpu_kernel void @fadd_f16(ptr addrspace(1) %out, half %a, half %b) #0 {
+define amdgpu_kernel void @fadd_f16(ptr addrspace(1) %out, half %a, half %b) nounwind {
 ; CI-LABEL: fadd_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x2
@@ -2652,7 +2652,7 @@ define amdgpu_kernel void @fadd_f16(ptr addrspace(1) %out, half %a, half %b) #0
    ret void
 }
 
-define amdgpu_kernel void @fadd_v2f16(ptr addrspace(1) %out, <2 x half> %a, <2 x half> %b) #0 {
+define amdgpu_kernel void @fadd_v2f16(ptr addrspace(1) %out, <2 x half> %a, <2 x half> %b) nounwind {
 ; CI-LABEL: fadd_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2706,7 +2706,7 @@ define amdgpu_kernel void @fadd_v2f16(ptr addrspace(1) %out, <2 x half> %a, <2 x
   ret void
 }
 
-define amdgpu_kernel void @fadd_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @fadd_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CI-LABEL: fadd_v4f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2784,7 +2784,7 @@ define amdgpu_kernel void @fadd_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
   ret void
 }
 
-define amdgpu_kernel void @fadd_v8f16(ptr addrspace(1) %out, <8 x half> %a, <8 x half> %b) #0 {
+define amdgpu_kernel void @fadd_v8f16(ptr addrspace(1) %out, <8 x half> %a, <8 x half> %b) nounwind {
 ; CI-LABEL: fadd_v8f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x4
@@ -2905,7 +2905,7 @@ define amdgpu_kernel void @fadd_v8f16(ptr addrspace(1) %out, <8 x half> %a, <8 x
   ret void
 }
 
-define amdgpu_kernel void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) nounwind {
 ; CIVI-LABEL: test_bitcast_from_half:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2936,7 +2936,7 @@ define amdgpu_kernel void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addr
   ret void
 }
 
-define amdgpu_kernel void @test_bitcast_to_half(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_bitcast_to_half(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; CIVI-LABEL: test_bitcast_to_half:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2966,5 +2966,3 @@ define amdgpu_kernel void @test_bitcast_to_half(ptr addrspace(1) %out, ptr addrs
   store half %val_fp, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll
index 830a40ff052acc..8d5d91e9ae86cb 100644
--- a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll
+++ b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll
@@ -13,7 +13,7 @@
 
 define amdgpu_kernel void @hoist_cond(ptr addrspace(1) nocapture %arg, ptr addrspace(1) noalias nocapture readonly %arg1, i32 %arg3, i32 %arg4) {
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp5 = icmp ult i32 %tmp, %arg3
   br label %bb1
 
@@ -41,6 +41,4 @@ bb4:                                             ; preds = %bb3
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
index af7b57a9f67bd7..87702759744ced 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
@@ -4,7 +4,7 @@
 ; GCN: .amdhsa_dx10_clamp 1
 ; GCN: .amdhsa_ieee_mode 1
 ; GCN: FloatMode: 240
-define amdgpu_kernel void @test_default_ci(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #0 {
+define amdgpu_kernel void @test_default_ci(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "target-cpu"="kaveri" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -14,7 +14,7 @@ define amdgpu_kernel void @test_default_ci(ptr addrspace(1) %out0, ptr addrspace
 ; GCN: .amdhsa_dx10_clamp 1
 ; GCN: .amdhsa_ieee_mode 1
 ; GCN: FloatMode: 240
-define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #1 {
+define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "target-cpu"="fiji" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -24,7 +24,7 @@ define amdgpu_kernel void @test_default_vi(ptr addrspace(1) %out0, ptr addrspace
 ; GCN: .amdhsa_dx10_clamp 1
 ; GCN: .amdhsa_ieee_mode 1
 ; GCN: FloatMode: 192
-define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #2 {
+define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -34,7 +34,7 @@ define amdgpu_kernel void @test_f64_denormals(ptr addrspace(1) %out0, ptr addrsp
 ; GCN: .amdhsa_dx10_clamp 1
 ; GCN: .amdhsa_ieee_mode 1
 ; GCN: FloatMode: 48
-define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #3 {
+define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -44,7 +44,7 @@ define amdgpu_kernel void @test_f32_denormals(ptr addrspace(1) %out0, ptr addrsp
 ; GCN: .amdhsa_dx10_clamp 1
 ; GCN: .amdhsa_ieee_mode 1
 ; GCN: FloatMode: 240
-define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #4 {
+define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="ieee,ieee" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -54,7 +54,7 @@ define amdgpu_kernel void @test_f32_f64_denormals(ptr addrspace(1) %out0, ptr ad
 ; GCN: .amdhsa_dx10_clamp 1
 ; GCN: .amdhsa_ieee_mode 1
 ; GCN: FloatMode: 0
-define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #5 {
+define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -64,7 +64,7 @@ define amdgpu_kernel void @test_no_denormals(ptr addrspace(1) %out0, ptr addrspa
 ; GCN: .amdhsa_dx10_clamp 0
 ; GCN: .amdhsa_ieee_mode 1
 ; GCN: FloatMode: 240
-define amdgpu_kernel void @test_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #6 {
+define amdgpu_kernel void @test_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -74,7 +74,7 @@ define amdgpu_kernel void @test_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr add
 ; GCN: .amdhsa_dx10_clamp 1
 ; GCN: .amdhsa_ieee_mode 0
 ; GCN: FloatMode: 240
-define amdgpu_kernel void @test_no_ieee_mode_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #7 {
+define amdgpu_kernel void @test_no_ieee_mode_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
@@ -84,21 +84,11 @@ define amdgpu_kernel void @test_no_ieee_mode_vi(ptr addrspace(1) %out0, ptr addr
 ; GCN: .amdhsa_dx10_clamp 0
 ; GCN: .amdhsa_ieee_mode 0
 ; GCN: FloatMode: 240
-define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) #8 {
+define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" {
   store float 0.0, ptr addrspace(1) %out0
   store double 0.0, ptr addrspace(1) %out1
   ret void
 }
 
-attributes #0 = { nounwind "target-cpu"="kaveri" }
-attributes #1 = { nounwind "target-cpu"="fiji" }
-attributes #2 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #3 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #4 = { nounwind "denormal-fp-math"="ieee,ieee" }
-attributes #5 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #6 = { nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" }
-attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" }
-attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll
index b6eff8846dc8c7..aa8747a6b35fc5 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll
@@ -15,7 +15,7 @@
 ; GFX90A:      .vgpr_count:    44
 ; GFX908:      .vgpr_count:    32
 ; GFX801:      .vgpr_count:    9
-define amdgpu_kernel void @kernel_32_agprs() #0 {
+define amdgpu_kernel void @kernel_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v8}" ()
   call void asm sideeffect "", "~{a31}" ()
@@ -29,7 +29,7 @@ bb:
 ; GFX90A:      .vgpr_count:    1
 ; GFX908:      .vgpr_count:    1
 ; GFX801:      .vgpr_count:    1
-define amdgpu_kernel void @kernel_0_agprs() #0 {
+define amdgpu_kernel void @kernel_0_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v0}" ()
   ret void
@@ -42,7 +42,7 @@ bb:
 ; GFX90A:      .vgpr_count:    56
 ; GFX908:      .vgpr_count:    40
 ; GFX801:      .vgpr_count:    40
-define amdgpu_kernel void @kernel_40_vgprs() #0 {
+define amdgpu_kernel void @kernel_40_vgprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v39}" ()
   call void asm sideeffect "", "~{a15}" ()
@@ -56,14 +56,14 @@ bb:
 ; GFX90A:      .vgpr_count:    512
 ; GFX908:      .vgpr_count:    256
 ; GFX801:      .vgpr_count:    256
-define amdgpu_kernel void @kernel_max_gprs() #0 {
+define amdgpu_kernel void @kernel_max_gprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v255}" ()
   call void asm sideeffect "", "~{a255}" ()
   ret void
 }
 
-define void @func_32_agprs() #0 {
+define void @func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v8}" ()
   call void asm sideeffect "", "~{a31}" ()
@@ -77,9 +77,9 @@ bb:
 ; GFX90A:      .vgpr_count:    44
 ; GFX908:      .vgpr_count:    32
 ; GFX801:      .vgpr_count:    9
-define amdgpu_kernel void @kernel_call_func_32_agprs() #0 {
+define amdgpu_kernel void @kernel_call_func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
-  call void @func_32_agprs() #0
+  call void @func_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512"
   ret void
 }
 
@@ -92,10 +92,8 @@ declare void @undef_func()
 ; GFX90A:      .vgpr_count:    64
 ; GFX908:      .vgpr_count:    32
 ; GFX801:      .vgpr_count:    32
-define amdgpu_kernel void @kernel_call_undef_func() #0 {
+define amdgpu_kernel void @kernel_call_undef_func() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void @undef_func()
   ret void
 }
-
-attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-small.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-small.ll
index 5ec1502899edf1..80cc674dc9f709 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-small.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-small.ll
@@ -33,7 +33,7 @@
 ; GFX90A:      .vgpr_count:    44
 
 ; GFX801:      .vgpr_count:    9
-define amdgpu_kernel void @kernel_32_agprs() #0 {
+define amdgpu_kernel void @kernel_32_agprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v8}" ()
   call void asm sideeffect "", "~{a31}" ()
@@ -47,11 +47,9 @@ bb:
 ; GFX90A:      .vgpr_count:    56
 
 ; GFX801:      .vgpr_count:    40
-define amdgpu_kernel void @kernel_40_vgprs() #0 {
+define amdgpu_kernel void @kernel_40_vgprs() nounwind noinline "amdgpu-flat-work-group-size"="1,512" {
 bb:
   call void asm sideeffect "", "~{v39}" ()
   call void asm sideeffect "", "~{a15}" ()
   ret void
 }
-
-attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-func-hidden-args-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-func-hidden-args-v5.ll
index cd46747370ad18..a76159c551937a 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-func-hidden-args-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-func-hidden-args-v5.ll
@@ -104,7 +104,7 @@ define void @funcs_dyn_lds() {
 define amdgpu_kernel void @test_v5(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) optnone noinline {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -120,5 +120,3 @@ entry:
 !1 = !{!"1:1:4:%d\5Cn"}
 !2 = !{!"2:1:8:%g\5Cn"}
 
-attributes #0 = { optnone noinline }
-
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-funcarg-hidden-args-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-funcarg-hidden-args-v5.ll
index 2fe96975bb92e2..30bd69bdfa01f0 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-funcarg-hidden-args-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-funcarg-hidden-args-v5.ll
@@ -104,7 +104,7 @@ define void @funcs_dyn_lds(ptr addrspace(3) %lds_ptr) {
 define amdgpu_kernel void @test_v5(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) optnone noinline {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -120,5 +120,3 @@ entry:
 !1 = !{!"1:1:4:%d\5Cn"}
 !2 = !{!"2:1:8:%g\5Cn"}
 
-attributes #0 = { optnone noinline }
-
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-kernarg-hidden-args-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-kernarg-hidden-args-v5.ll
index b3ed362052bb4c..4663a8481b360d 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-kernarg-hidden-args-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-dynlds-kernarg-hidden-args-v5.ll
@@ -105,7 +105,7 @@ define amdgpu_kernel void @test_v5(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(3) %lds_ptr) #0 {
+    ptr addrspace(3) %lds_ptr) optnone noinline {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -121,5 +121,3 @@ entry:
 !1 = !{!"1:1:4:%d\5Cn"}
 !2 = !{!"2:1:8:%g\5Cn"}
 
-attributes #0 = { optnone noinline }
-
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
index 3d0e061d33286e..c59c68a165f3d9 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
@@ -26,7 +26,7 @@
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_non_enqueue_kernel_caller
 ; CHECK:          .symbol:         test_non_enqueue_kernel_caller.kd
-define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
+define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
   ret void
@@ -62,7 +62,7 @@ define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_enqueue_kernel_caller
 ; CHECK:          .symbol:         test_enqueue_kernel_caller.kd
-define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) optnone noinline "amdgpu-implicitarg-num-bytes"="48"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
   ret void
@@ -98,7 +98,7 @@ define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_no_completion_action
 ; CHECK:          .symbol:         test_no_completion_action.kd
-define amdgpu_kernel void @test_no_completion_action(i8 %a) #2
+define amdgpu_kernel void @test_no_completion_action(i8 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
   ret void
@@ -134,7 +134,7 @@ define amdgpu_kernel void @test_no_completion_action(i8 %a) #2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_no_default_queue
 ; CHECK:          .symbol:         test_no_default_queue.kd
-define amdgpu_kernel void @test_no_default_queue(i8 %a) #3
+define amdgpu_kernel void @test_no_default_queue(i8 %a) optnone noinline "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="48"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
   ret void
@@ -145,11 +145,6 @@ define amdgpu_kernel void @test_no_default_queue(i8 %a) #3
 ; CHECK-NEXT: - 1
 ; CHECK-NOT:  amdhsa.printf:
 
-attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" }
-attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-attributes #2 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" }
-attributes #3 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="48" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
 
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
index 28246d7f9e6fb2..708b68ce0bea76 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
@@ -46,7 +46,7 @@
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_char
 ; CHECK:          .symbol:         test_char.kd
-define amdgpu_kernel void @test_char(i8 %a) #0
+define amdgpu_kernel void @test_char(i8 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
     !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
   ret void
@@ -80,7 +80,7 @@ define amdgpu_kernel void @test_char(i8 %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_char_byref_constant
 ; CHECK:          .symbol:         test_char_byref_constant.kd
-define amdgpu_kernel void @test_char_byref_constant(ptr addrspace(4) byref(i8) %a) #0
+define amdgpu_kernel void @test_char_byref_constant(ptr addrspace(4) byref(i8) %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
     !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
   ret void
@@ -118,7 +118,7 @@ define amdgpu_kernel void @test_char_byref_constant(ptr addrspace(4) byref(i8) %
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_char_byref_constant_align512
 ; CHECK:          .symbol:         test_char_byref_constant_align512.kd
-define amdgpu_kernel void @test_char_byref_constant_align512(i8, ptr addrspace(4) byref(i8) align(512) %a) #0
+define amdgpu_kernel void @test_char_byref_constant_align512(i8, ptr addrspace(4) byref(i8) align(512) %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !111
     !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
   ret void
@@ -157,7 +157,7 @@ define amdgpu_kernel void @test_char_byref_constant_align512(i8, ptr addrspace(4
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_ushort2
 ; CHECK:          .symbol:         test_ushort2.kd
-define amdgpu_kernel void @test_ushort2(<2 x i16> %a) #0
+define amdgpu_kernel void @test_ushort2(<2 x i16> %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10
     !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
   ret void
@@ -196,7 +196,7 @@ define amdgpu_kernel void @test_ushort2(<2 x i16> %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_int3
 ; CHECK:          .symbol:         test_int3.kd
-define amdgpu_kernel void @test_int3(<3 x i32> %a) #0
+define amdgpu_kernel void @test_int3(<3 x i32> %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11
     !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
   ret void
@@ -235,7 +235,7 @@ define amdgpu_kernel void @test_int3(<3 x i32> %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_ulong4
 ; CHECK:          .symbol:         test_ulong4.kd
-define amdgpu_kernel void @test_ulong4(<4 x i64> %a) #0
+define amdgpu_kernel void @test_ulong4(<4 x i64> %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12
     !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
   ret void
@@ -274,7 +274,7 @@ define amdgpu_kernel void @test_ulong4(<4 x i64> %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_half8
 ; CHECK:          .symbol:         test_half8.kd
-define amdgpu_kernel void @test_half8(<8 x half> %a) #0
+define amdgpu_kernel void @test_half8(<8 x half> %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13
     !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
   ret void
@@ -313,7 +313,7 @@ define amdgpu_kernel void @test_half8(<8 x half> %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_float16
 ; CHECK:          .symbol:         test_float16.kd
-define amdgpu_kernel void @test_float16(<16 x float> %a) #0
+define amdgpu_kernel void @test_float16(<16 x float> %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14
     !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
   ret void
@@ -352,7 +352,7 @@ define amdgpu_kernel void @test_float16(<16 x float> %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_double16
 ; CHECK:          .symbol:         test_double16.kd
-define amdgpu_kernel void @test_double16(<16 x double> %a) #0
+define amdgpu_kernel void @test_double16(<16 x double> %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15
     !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
   ret void
@@ -392,7 +392,7 @@ define amdgpu_kernel void @test_double16(<16 x double> %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_pointer
 ; CHECK:          .symbol:         test_pointer.kd
-define amdgpu_kernel void @test_pointer(ptr addrspace(1) %a) #0
+define amdgpu_kernel void @test_pointer(ptr addrspace(1) %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16
     !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
   ret void
@@ -431,7 +431,7 @@ define amdgpu_kernel void @test_pointer(ptr addrspace(1) %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_image
 ; CHECK:          .symbol:         test_image.kd
-define amdgpu_kernel void @test_image(ptr addrspace(1) %a) #0
+define amdgpu_kernel void @test_image(ptr addrspace(1) %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17
     !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
   ret void
@@ -470,7 +470,7 @@ define amdgpu_kernel void @test_image(ptr addrspace(1) %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_sampler
 ; CHECK:          .symbol:         test_sampler.kd
-define amdgpu_kernel void @test_sampler(i32 %a) #0
+define amdgpu_kernel void @test_sampler(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18
     !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
   ret void
@@ -509,7 +509,7 @@ define amdgpu_kernel void @test_sampler(i32 %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_queue
 ; CHECK:          .symbol:         test_queue.kd
-define amdgpu_kernel void @test_queue(ptr addrspace(1) %a) #0
+define amdgpu_kernel void @test_queue(ptr addrspace(1) %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19
     !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
   ret void
@@ -548,7 +548,7 @@ define amdgpu_kernel void @test_queue(ptr addrspace(1) %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_struct
 ; CHECK:          .symbol:         test_struct.kd
-define amdgpu_kernel void @test_struct(%struct.A %a) #0
+define amdgpu_kernel void @test_struct(%struct.A %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
     !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
   ret void
@@ -587,7 +587,7 @@ define amdgpu_kernel void @test_struct(%struct.A %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_struct_byref_constant
 ; CHECK:          .symbol:         test_struct_byref_constant.kd
-define amdgpu_kernel void @test_struct_byref_constant(ptr addrspace(4) byref(%struct.A) %a) #0
+define amdgpu_kernel void @test_struct_byref_constant(ptr addrspace(4) byref(%struct.A) %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
     !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
   ret void
@@ -626,7 +626,7 @@ define amdgpu_kernel void @test_struct_byref_constant(ptr addrspace(4) byref(%st
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_array
 ; CHECK:          .symbol:         test_array.kd
-define amdgpu_kernel void @test_array([32 x i8] %a) #0
+define amdgpu_kernel void @test_array([32 x i8] %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
     !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
   ret void
@@ -665,7 +665,7 @@ define amdgpu_kernel void @test_array([32 x i8] %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_array_byref_constant
 ; CHECK:          .symbol:         test_array_byref_constant.kd
-define amdgpu_kernel void @test_array_byref_constant(ptr addrspace(4) byref([32 x i8]) %a) #0
+define amdgpu_kernel void @test_array_byref_constant(ptr addrspace(4) byref([32 x i8]) %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
     !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
   ret void
@@ -704,7 +704,7 @@ define amdgpu_kernel void @test_array_byref_constant(ptr addrspace(4) byref([32
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_i128
 ; CHECK:          .symbol:         test_i128.kd
-define amdgpu_kernel void @test_i128(i128 %a) #0
+define amdgpu_kernel void @test_i128(i128 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21
     !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
   ret void
@@ -753,7 +753,7 @@ define amdgpu_kernel void @test_i128(i128 %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_multi_arg
 ; CHECK:          .symbol:         test_multi_arg.kd
-define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0
+define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24
     !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
   ret void
@@ -808,7 +808,7 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0
 ; CHECK:          .symbol:         test_addr_space.kd
 define amdgpu_kernel void @test_addr_space(ptr addrspace(1) %g,
                                            ptr addrspace(4) %c,
-                                           ptr addrspace(3) align 4 %l) #0
+                                           ptr addrspace(3) align 4 %l) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
     !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
   ret void
@@ -865,7 +865,7 @@ define amdgpu_kernel void @test_addr_space(ptr addrspace(1) %g,
 ; CHECK:          .symbol:         test_type_qual.kd
 define amdgpu_kernel void @test_type_qual(ptr addrspace(1) %a,
                                           ptr addrspace(1) %b,
-                                          ptr addrspace(1) %c) #0
+                                          ptr addrspace(1) %c) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51
     !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
   ret void
@@ -919,7 +919,7 @@ define amdgpu_kernel void @test_type_qual(ptr addrspace(1) %a,
 ; CHECK:          .symbol:         test_access_qual.kd
 define amdgpu_kernel void @test_access_qual(ptr addrspace(1) %ro,
                                             ptr addrspace(1) %wo,
-                                            ptr addrspace(1) %rw) #0
+                                            ptr addrspace(1) %rw) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62
     !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
   ret void
@@ -959,7 +959,7 @@ define amdgpu_kernel void @test_access_qual(ptr addrspace(1) %ro,
 ; CHECK:          .name:           test_vec_type_hint_half
 ; CHECK:          .symbol:         test_vec_type_hint_half.kd
 ; CHECK:          .vec_type_hint:  half
-define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) #0
+define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 {
   ret void
@@ -999,7 +999,7 @@ define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) #0
 ; CHECK:          .name:           test_vec_type_hint_float
 ; CHECK:          .symbol:         test_vec_type_hint_float.kd
 ; CHECK:          .vec_type_hint:  float
-define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) #0
+define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 {
   ret void
@@ -1039,7 +1039,7 @@ define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) #0
 ; CHECK:          .name:           test_vec_type_hint_double
 ; CHECK:          .symbol:         test_vec_type_hint_double.kd
 ; CHECK:          .vec_type_hint:  double
-define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) #0
+define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 {
   ret void
@@ -1079,7 +1079,7 @@ define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) #0
 ; CHECK:          .name:           test_vec_type_hint_char
 ; CHECK:          .symbol:         test_vec_type_hint_char.kd
 ; CHECK:          .vec_type_hint:  char
-define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) #0
+define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 {
   ret void
@@ -1119,7 +1119,7 @@ define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) #0
 ; CHECK:          .name:           test_vec_type_hint_short
 ; CHECK:          .symbol:         test_vec_type_hint_short.kd
 ; CHECK:          .vec_type_hint:  short
-define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) #0
+define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 {
   ret void
@@ -1159,7 +1159,7 @@ define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) #0
 ; CHECK:          .name:           test_vec_type_hint_long
 ; CHECK:          .symbol:         test_vec_type_hint_long.kd
 ; CHECK:          .vec_type_hint:  long
-define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) #0
+define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 {
   ret void
@@ -1199,7 +1199,7 @@ define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) #0
 ; CHECK:          .name:           test_vec_type_hint_unknown
 ; CHECK:          .symbol:         test_vec_type_hint_unknown.kd
 ; CHECK:          .vec_type_hint:  unknown
-define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) #0
+define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 {
   ret void
@@ -1243,7 +1243,7 @@ define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) #0
 ; CHECK-NEXT:       - 4
 ; CHECK:          .symbol:         test_reqd_wgs_vec_type_hint.kd
 ; CHECK:          .vec_type_hint:  int
-define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) #0
+define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5
     !reqd_work_group_size !6 {
@@ -1288,7 +1288,7 @@ define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) #0
 ; CHECK-NEXT:       - 8
 ; CHECK-NEXT:       - 16
 ; CHECK-NEXT:       - 32
-define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) #0
+define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7
     !work_group_size_hint !8 {
@@ -1329,7 +1329,7 @@ define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_arg_ptr_to_ptr
 ; CHECK:          .symbol:         test_arg_ptr_to_ptr.kd
-define amdgpu_kernel void @test_arg_ptr_to_ptr(ptr addrspace(1) %a) #0
+define amdgpu_kernel void @test_arg_ptr_to_ptr(ptr addrspace(1) %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80
     !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
   ret void
@@ -1368,7 +1368,7 @@ define amdgpu_kernel void @test_arg_ptr_to_ptr(ptr addrspace(1) %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_arg_struct_contains_ptr
 ; CHECK:          .symbol:         test_arg_struct_contains_ptr.kd
-define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B %a) #0
+define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82
     !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
  ret void
@@ -1407,7 +1407,7 @@ define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B %a) #0
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_arg_vector_of_ptr
 ; CHECK:          .symbol:         test_arg_vector_of_ptr.kd
-define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x ptr addrspace(1)> %a) #0
+define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x ptr addrspace(1)> %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83
     !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
   ret void
@@ -1448,7 +1448,7 @@ define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x ptr addrspace(1)> %a) #0
 ; CHECK:          .name:           test_arg_unknown_builtin_type
 ; CHECK:          .symbol:         test_arg_unknown_builtin_type.kd
 define amdgpu_kernel void @test_arg_unknown_builtin_type(
-    ptr addrspace(1) %a) #0
+    ptr addrspace(1) %a) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84
     !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
   ret void
@@ -1543,7 +1543,7 @@ define amdgpu_kernel void @test_pointee_align(ptr addrspace(1) %a,
                                               ptr addrspace(3) align 4 %e,
                                               ptr addrspace(3) align 8 %f,
                                               ptr addrspace(3) align 16 %g,
-                                              ptr addrspace(3) %h) #0
+                                              ptr addrspace(3) %h) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
     !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
   ret void
@@ -1638,7 +1638,7 @@ define amdgpu_kernel void @test_pointee_align_attribute(ptr addrspace(1) align 1
                                                         ptr addrspace(3) align 256 %e,
                                                         ptr addrspace(3) align 128 %f,
                                                         ptr addrspace(3) align 1024 %g,
-                                                        ptr addrspace(3) align 16 %h) #0
+                                                        ptr addrspace(3) align 16 %h) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
     !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
   ret void
@@ -1678,7 +1678,7 @@ define amdgpu_kernel void @test_pointee_align_attribute(ptr addrspace(1) align 1
 ; CHECK:          .name:           __test_block_invoke_kernel
 ; CHECK:          .symbol:         __test_block_invoke_kernel.kd
 define amdgpu_kernel void @__test_block_invoke_kernel(
-    <{ i32, i32, ptr, ptr addrspace(1), i8 }> %arg) #1
+    <{ i32, i32, ptr, ptr addrspace(1), i8 }> %arg) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110
     !kernel_arg_base_type !110 !kernel_arg_type_qual !4 {
   ret void
@@ -1717,7 +1717,7 @@ define amdgpu_kernel void @__test_block_invoke_kernel(
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_enqueue_kernel_caller
 ; CHECK:          .symbol:         test_enqueue_kernel_caller.kd
-define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #2
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) optnone noinline "amdgpu-implicitarg-num-bytes"="56"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
     !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
   ret void
@@ -1730,7 +1730,7 @@ define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #2
 ; CHECK-NEXT:         .value_kind:     global_buffer
 ; CHECK:          .name:           unknown_addrspace_kernarg
 ; CHECK:          .symbol:         unknown_addrspace_kernarg.kd
-define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr) #0 {
+define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr) optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" {
   ret void
 }
 
@@ -1741,10 +1741,6 @@ define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr)
 ; CHECK-NEXT: - 1
 ; CHECK-NEXT: - 1
 
-attributes #0 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" }
-attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
-attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
 
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll
index 6a49eac134a67b..13f97812c05e9d 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll
@@ -3,22 +3,22 @@
 
 declare void @function1()
 
-declare void @function2() #0
+declare void @function2() "amdgpu-no-heap-ptr"
 
 ; Function Attrs: noinline
-define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #2 {
+define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) noinline {
   store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function4(i64 %arg, ptr %a) #2 {
+define void @function4(i64 %arg, ptr %a) noinline {
   store i64 %arg, ptr %a
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
+define void @function5(ptr addrspace(4) %ptr, ptr %sink) noinline {
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 64
   %x = load i64, ptr addrspace(4) %gep
   store i64 %x, ptr %sink
@@ -26,7 +26,7 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable willreturn
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
@@ -53,7 +53,7 @@ define amdgpu_kernel void @test_kernel20(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_heap_v1
 ; CHECK-LABEL:    .name:           test_kernel21
-define amdgpu_kernel void @test_kernel21(ptr %a) #0 {
+define amdgpu_kernel void @test_kernel21(ptr %a) "amdgpu-no-heap-ptr" {
   call void @function1()
   store i8 3, ptr %a, align 1
   ret void
@@ -101,7 +101,7 @@ define amdgpu_kernel void @test_kernel40(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_heap_v1
 ; CHECK-LABEL:    .name:           test_kernel41
-define amdgpu_kernel void @test_kernel41(ptr %a) #0 {
+define amdgpu_kernel void @test_kernel41(ptr %a) "amdgpu-no-heap-ptr" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96
   %x = load i64, ptr addrspace(4) %gep
@@ -233,7 +233,7 @@ define amdgpu_kernel void @test_kernel52(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK: hidden_heap_v1
 ; CHECK-LABEL:    .name:           test_kernel60
-define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel60(ptr %a) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96
   %x = load i64, ptr addrspace(4) %gep
@@ -246,7 +246,7 @@ define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_heap_v1
 ; CHECK-LABEL:    .name:           test_kernel61
-define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel61(ptr %a) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 32
   call void @function5(ptr addrspace(4) %gep, ptr %a)
@@ -258,7 +258,7 @@ define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_heap_v1
 ; CHECK-LABEL:    .name:           test_kernel70
-define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8
@@ -270,7 +270,7 @@ define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_heap_v1
 ; CHECK-LABEL:    .name:           test_kernel71
-define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink)
@@ -282,16 +282,12 @@ define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_heap_v1
 ; CHECK-LABEL:    .name:           test_kernel72
-define amdgpu_kernel void @test_kernel72() #2 {
+define amdgpu_kernel void @test_kernel72() noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8
   ret void
 }
 
-attributes #0 = { "amdgpu-no-heap-ptr" }
-attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
index ccdcb523ef0bc0..c9a54aeb1b465e 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
@@ -59,7 +59,7 @@ entry:
 define amdgpu_kernel void @test8(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) optnone noinline "amdgpu-implicitarg-num-bytes"="8" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -95,7 +95,7 @@ entry:
 define amdgpu_kernel void @test16(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #1 {
+    ptr addrspace(1) %b) optnone noinline "amdgpu-implicitarg-num-bytes"="16" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -134,7 +134,7 @@ entry:
 define amdgpu_kernel void @test24(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #2 {
+    ptr addrspace(1) %b) optnone noinline "amdgpu-implicitarg-num-bytes"="24" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -176,7 +176,7 @@ entry:
 define amdgpu_kernel void @test32(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #3 {
+    ptr addrspace(1) %b) optnone noinline "amdgpu-implicitarg-num-bytes"="32" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -224,7 +224,7 @@ entry:
 define amdgpu_kernel void @test48(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #4 {
+    ptr addrspace(1) %b) optnone noinline "amdgpu-implicitarg-num-bytes"="48" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -275,7 +275,7 @@ entry:
 define amdgpu_kernel void @test56(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #5 {
+    ptr addrspace(1) %b) optnone noinline "amdgpu-implicitarg-num-bytes"="56" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -290,12 +290,6 @@ entry:
 
 ; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to
 ; avoid optimizing out the implicit argument allocation.
-attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="8" }
-attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="16" }
-attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="24" }
-attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" }
-attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
index e10f050b8e7a6c..c7817bb4a7ffdb 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
@@ -104,7 +104,7 @@
 define amdgpu_kernel void @test_v5(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) optnone noinline {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -120,5 +120,3 @@ entry:
 !1 = !{!"1:1:4:%d\5Cn"}
 !2 = !{!"2:1:8:%g\5Cn"}
 
-attributes #0 = { optnone noinline }
-
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
index 48988a8aead8ad..bcbef684ceb7cb 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
@@ -27,7 +27,7 @@
 ; CHECK:          .name:           test_kernel
 ; CHECK:          .symbol:         test_kernel.kd
 
-define amdgpu_kernel void @test_kernel(i8 %a) #0
+define amdgpu_kernel void @test_kernel(i8 %a) sanitize_address "amdgpu-implicitarg-num-bytes"="48"
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
   ret void
@@ -37,8 +37,6 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0
 ; CHECK-NEXT: - 1
 ; CHECK-NEXT: - 1
 
-attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
 !1 = !{i32 0}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
index 6f4c8911efd33b..05e11c67dc8d1d 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
@@ -3,22 +3,22 @@
 
 declare void @function1()
 
-declare void @function2() #0
+declare void @function2() "amdgpu-no-hostcall-ptr"
 
 ; Function Attrs: noinline
-define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #4 {
+define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) noinline {
   store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function4(i64 %arg, ptr %a) #4 {
+define void @function4(i64 %arg, ptr %a) noinline {
   store i64 %arg, ptr %a
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function5(ptr addrspace(4) %ptr, ptr %sink) #4 {
+define void @function5(ptr addrspace(4) %ptr, ptr %sink) noinline {
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 8
   %x = load i64, ptr addrspace(4) %gep
   store i64 %x, ptr %sink
@@ -26,13 +26,13 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #4 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable willreturn
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel10
-define amdgpu_kernel void @test_kernel10(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel10(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   store i8 3, ptr %a, align 1
   ret void
 }
@@ -42,7 +42,7 @@ define amdgpu_kernel void @test_kernel10(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel20
-define amdgpu_kernel void @test_kernel20(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel20(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   call void @function1()
   store i8 3, ptr %a, align 1
   ret void
@@ -53,7 +53,7 @@ define amdgpu_kernel void @test_kernel20(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel21
-define amdgpu_kernel void @test_kernel21(ptr %a) #3 {
+define amdgpu_kernel void @test_kernel21(ptr %a) "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" {
   call void @function1()
   store i8 3, ptr %a, align 1
   ret void
@@ -64,7 +64,7 @@ define amdgpu_kernel void @test_kernel21(ptr %a) #3 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel22
-define amdgpu_kernel void @test_kernel22(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel22(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   call void @function2()
   store i8 3, ptr %a, align 1
   ret void
@@ -75,7 +75,7 @@ define amdgpu_kernel void @test_kernel22(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel30
-define amdgpu_kernel void @test_kernel30(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel30(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
   %x = load i128, ptr addrspace(4) %gep
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_kernel30(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel40
-define amdgpu_kernel void @test_kernel40(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel40(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 24
   %x = load i64, ptr addrspace(4) %gep
@@ -101,7 +101,7 @@ define amdgpu_kernel void @test_kernel40(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel41
-define amdgpu_kernel void @test_kernel41(ptr %a) #3 {
+define amdgpu_kernel void @test_kernel41(ptr %a) "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 24
   %x = load i64, ptr addrspace(4) %gep
@@ -114,7 +114,7 @@ define amdgpu_kernel void @test_kernel41(ptr %a) #3 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel42
-define amdgpu_kernel void @test_kernel42(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel42(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
   %x = load i64, ptr addrspace(4) %gep
@@ -127,7 +127,7 @@ define amdgpu_kernel void @test_kernel42(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel43
-define amdgpu_kernel void @test_kernel43(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel43(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 32
   %x = load i64, ptr addrspace(4) %gep
@@ -140,7 +140,7 @@ define amdgpu_kernel void @test_kernel43(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel44
-define amdgpu_kernel void @test_kernel44(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel44(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 23
   %x = load i8, ptr addrspace(4) %gep, align 1
@@ -153,7 +153,7 @@ define amdgpu_kernel void @test_kernel44(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel45
-define amdgpu_kernel void @test_kernel45(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel45(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 24
   %x = load i8, ptr addrspace(4) %gep, align 1
@@ -166,7 +166,7 @@ define amdgpu_kernel void @test_kernel45(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel46
-define amdgpu_kernel void @test_kernel46(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel46(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 31
   %x = load i8, ptr addrspace(4) %gep, align 1
@@ -179,7 +179,7 @@ define amdgpu_kernel void @test_kernel46(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel47
-define amdgpu_kernel void @test_kernel47(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel47(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 32
   %x = load i8, ptr addrspace(4) %gep, align 1
@@ -192,7 +192,7 @@ define amdgpu_kernel void @test_kernel47(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel50
-define amdgpu_kernel void @test_kernel50(ptr %a, i32 %b) #2 {
+define amdgpu_kernel void @test_kernel50(ptr %a, i32 %b) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 %b
   %x = load i8, ptr addrspace(4) %gep, align 1
@@ -205,7 +205,7 @@ define amdgpu_kernel void @test_kernel50(ptr %a, i32 %b) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel51
-define amdgpu_kernel void @test_kernel51(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel51(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
   %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 8
@@ -219,7 +219,7 @@ define amdgpu_kernel void @test_kernel51(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel52
-define amdgpu_kernel void @test_kernel52(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel52(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
   %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 16
@@ -233,7 +233,7 @@ define amdgpu_kernel void @test_kernel52(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel60
-define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel60(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 24
   %x = load i64, ptr addrspace(4) %gep
@@ -246,7 +246,7 @@ define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel61
-define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel61(ptr %a) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
   call void @function5(ptr addrspace(4) %gep, ptr %a)
@@ -258,7 +258,7 @@ define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel70
-define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8
@@ -270,7 +270,7 @@ define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel71
-define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink)
@@ -282,18 +282,12 @@ define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel72
-define amdgpu_kernel void @test_kernel72() #2 {
+define amdgpu_kernel void @test_kernel72() "amdgpu-implicitarg-num-bytes"="48" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8
   ret void
 }
 
-attributes #0 = { "amdgpu-no-hostcall-ptr" }
-attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" }
-attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" }
-attributes #4 = { noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
index 01f8fbfd76314a..abe61cd6b2a656 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
@@ -3,22 +3,22 @@
 
 declare void @function1()
 
-declare void @function2() #0
+declare void @function2() "amdgpu-no-hostcall-ptr"
 
 ; Function Attrs: noinline
-define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #2 {
+define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) noinline {
   store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function4(i64 %arg, ptr %a) #2 {
+define void @function4(i64 %arg, ptr %a) noinline {
   store i64 %arg, ptr %a
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
+define void @function5(ptr addrspace(4) %ptr, ptr %sink) noinline {
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 64
   %x = load i64, ptr addrspace(4) %gep
   store i64 %x, ptr %sink
@@ -26,7 +26,7 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable willreturn
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
@@ -53,7 +53,7 @@ define amdgpu_kernel void @test_kernel20(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel21
-define amdgpu_kernel void @test_kernel21(ptr %a) #0 {
+define amdgpu_kernel void @test_kernel21(ptr %a) "amdgpu-no-hostcall-ptr" {
   call void @function1()
   store i8 3, ptr %a, align 1
   ret void
@@ -101,7 +101,7 @@ define amdgpu_kernel void @test_kernel40(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel41
-define amdgpu_kernel void @test_kernel41(ptr %a) #0 {
+define amdgpu_kernel void @test_kernel41(ptr %a) "amdgpu-no-hostcall-ptr" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 80
   %x = load i64, ptr addrspace(4) %gep
@@ -233,7 +233,7 @@ define amdgpu_kernel void @test_kernel52(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel60
-define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel60(ptr %a) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 80
   %x = load i64, ptr addrspace(4) %gep
@@ -246,7 +246,7 @@ define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel61
-define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel61(ptr %a) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
   call void @function5(ptr addrspace(4) %gep, ptr %a)
@@ -258,7 +258,7 @@ define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel70
-define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8
@@ -270,7 +270,7 @@ define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel71
-define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink)
@@ -282,16 +282,12 @@ define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_hostcall_buffer
 ; CHECK-LABEL:    .name:           test_kernel72
-define amdgpu_kernel void @test_kernel72() #2 {
+define amdgpu_kernel void @test_kernel72() noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8
   ret void
 }
 
-attributes #0 = { "amdgpu-no-hostcall-ptr" }
-attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
index 7a9f4ae8a20fae..d12c8e397519a9 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
@@ -37,7 +37,7 @@ entry:
 define amdgpu_kernel void @test_max_flat_workgroup_size(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #2 {
+    ptr addrspace(1) %b) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -64,7 +64,7 @@ define amdgpu_kernel void @num_spilled_sgprs(
     i32 %in0, i32 %in1, i32 %in2, i32 %in3, [8 x i32],
     i32 %in4, i32 %in5, i32 %in6, i32 %in7, [8 x i32],
     i32 %in8, i32 %in9, i32 %ina, i32 %inb, [8 x i32],
-    i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 {
+    i32 %inc, i32 %ind, i32 %ine, i32 %inf) "amdgpu-num-sgpr"="14" {
 entry:
   store i32 %in0, ptr addrspace(1) %out0
   store i32 %in1, ptr addrspace(1) %out1
@@ -88,7 +88,7 @@ entry:
 ; CHECK:   .name:       num_spilled_vgprs
 ; CHECK:   .symbol:     num_spilled_vgprs.kd
 ; CHECK:   .vgpr_spill_count: {{13|14}}
-define amdgpu_kernel void @num_spilled_vgprs() #1 {
+define amdgpu_kernel void @num_spilled_vgprs() "amdgpu-num-vgpr"="20" {
   %val0 = load volatile float, ptr addrspace(1) @var
   %val1 = load volatile float, ptr addrspace(1) @var
   %val2 = load volatile float, ptr addrspace(1) @var
@@ -160,9 +160,5 @@ define amdgpu_kernel void @num_spilled_vgprs() #1 {
 ; CHECK-NEXT: - 1
 ; CHECK-NEXT: - 1
 
-attributes #0 = { "amdgpu-num-sgpr"="14" }
-attributes #1 = { "amdgpu-num-vgpr"="20" }
-attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll
index 689619227b8d70..836c11d37b88cf 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll
@@ -3,22 +3,22 @@
 
 declare void @function1()
 
-declare void @function2() #0
+declare void @function2() "amdgpu-no-multigrid-sync-arg"
 
 ; Function Attrs: noinline
-define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #2 {
+define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) noinline {
   store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function4(i64 %arg, ptr %a) #2 {
+define void @function4(i64 %arg, ptr %a) noinline {
   store i64 %arg, ptr %a
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
+define void @function5(ptr addrspace(4) %ptr, ptr %sink) noinline {
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 72
   %x = load i64, ptr addrspace(4) %gep
   store i64 %x, ptr %sink
@@ -26,7 +26,7 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable willreturn
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
@@ -53,7 +53,7 @@ define amdgpu_kernel void @test_kernel20(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_multigrid_sync_arg
 ; CHECK-LABEL:    .name:           test_kernel21
-define amdgpu_kernel void @test_kernel21(ptr %a) #0 {
+define amdgpu_kernel void @test_kernel21(ptr %a) "amdgpu-no-multigrid-sync-arg" {
   call void @function1()
   store i8 3, ptr %a, align 1
   ret void
@@ -101,7 +101,7 @@ define amdgpu_kernel void @test_kernel40(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_multigrid_sync_arg
 ; CHECK-LABEL:    .name:           test_kernel41
-define amdgpu_kernel void @test_kernel41(ptr %a) #0 {
+define amdgpu_kernel void @test_kernel41(ptr %a) "amdgpu-no-multigrid-sync-arg" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88
   %x = load i64, ptr addrspace(4) %gep
@@ -233,7 +233,7 @@ define amdgpu_kernel void @test_kernel52(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK: hidden_multigrid_sync_arg
 ; CHECK-LABEL:    .name:           test_kernel60
-define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel60(ptr %a) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88
   %x = load i64, ptr addrspace(4) %gep
@@ -246,7 +246,7 @@ define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_multigrid_sync_arg
 ; CHECK-LABEL:    .name:           test_kernel61
-define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel61(ptr %a) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16
   call void @function5(ptr addrspace(4) %gep, ptr %a)
@@ -258,7 +258,7 @@ define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_multigrid_sync_arg
 ; CHECK-LABEL:    .name:           test_kernel70
-define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8
@@ -270,7 +270,7 @@ define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_multigrid_sync_arg
 ; CHECK-LABEL:    .name:           test_kernel71
-define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink)
@@ -282,16 +282,12 @@ define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_multigrid_sync_arg
 ; CHECK-LABEL:    .name:           test_kernel72
-define amdgpu_kernel void @test_kernel72() #2 {
+define amdgpu_kernel void @test_kernel72() noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8
   ret void
 }
 
-attributes #0 = { "amdgpu-no-multigrid-sync-arg" }
-attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll
index cf26a427aec324..b84aae0d852bc4 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll
@@ -3,22 +3,22 @@
 
 declare void @function1()
 
-declare void @function2() #0
+declare void @function2() "amdgpu-no-queue-ptr"
 
 ; Function Attrs: noinline
-define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #2 {
+define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) noinline {
   store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function4(i64 %arg, ptr %a) #2 {
+define void @function4(i64 %arg, ptr %a) noinline {
   store i64 %arg, ptr %a
   ret void
 }
 
 ; Function Attrs: noinline
-define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
+define void @function5(ptr addrspace(4) %ptr, ptr %sink) noinline {
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 168
   %x = load i64, ptr addrspace(4) %gep
   store i64 %x, ptr %sink
@@ -26,7 +26,7 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable willreturn
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
@@ -53,7 +53,7 @@ define amdgpu_kernel void @test_kernel20(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_queue_ptr
 ; CHECK-LABEL:    .name:           test_kernel21
-define amdgpu_kernel void @test_kernel21(ptr %a) #0 {
+define amdgpu_kernel void @test_kernel21(ptr %a) "amdgpu-no-queue-ptr" {
   call void @function1()
   store i8 3, ptr %a, align 1
   ret void
@@ -101,7 +101,7 @@ define amdgpu_kernel void @test_kernel40(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_queue_ptr
 ; CHECK-LABEL:    .name:           test_kernel41
-define amdgpu_kernel void @test_kernel41(ptr %a) #0 {
+define amdgpu_kernel void @test_kernel41(ptr %a) "amdgpu-no-queue-ptr" {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 200
   %x = load i64, ptr addrspace(4) %gep
@@ -233,7 +233,7 @@ define amdgpu_kernel void @test_kernel52(ptr %a) {
 ; CHECK:  - .args:
 ; CHECK: hidden_queue_ptr
 ; CHECK-LABEL:    .name:           test_kernel60
-define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel60(ptr %a) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 200
   %x = load i64, ptr addrspace(4) %gep
@@ -246,7 +246,7 @@ define amdgpu_kernel void @test_kernel60(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_queue_ptr
 ; CHECK-LABEL:    .name:           test_kernel61
-define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
+define amdgpu_kernel void @test_kernel61(ptr %a) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 32
   call void @function5(ptr addrspace(4) %gep, ptr %a)
@@ -258,7 +258,7 @@ define amdgpu_kernel void @test_kernel61(ptr %a) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_queue_ptr
 ; CHECK-LABEL:    .name:           test_kernel70
-define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8
@@ -270,7 +270,7 @@ define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK: hidden_queue_ptr
 ; CHECK-LABEL:    .name:           test_kernel71
-define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
+define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink)
@@ -282,16 +282,12 @@ define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 {
 ; CHECK:  - .args:
 ; CHECK-NOT: hidden_queue_ptr
 ; CHECK-LABEL:    .name:           test_kernel72
-define amdgpu_kernel void @test_kernel72() #2 {
+define amdgpu_kernel void @test_kernel72() noinline {
   %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42
   store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8
   ret void
 }
 
-attributes #0 = { "amdgpu-no-queue-ptr" }
-attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll
index 7986368e2a3584..2c854bb95b3119 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll
@@ -38,7 +38,7 @@ define amdgpu_kernel void @test1(ptr %x) {
   ret void
 }
 
-define internal float @f(float %arg0) #0 {
+define internal float @f(float %arg0) norecurse {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 3.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -104,7 +104,7 @@ define amdgpu_kernel void @test3() {
   ret void
 }
 
-declare void @g() #0
+declare void @g() norecurse
 
 ; test a kernel without an external call that occurs after its callee in the module
 ; CHECK-LABEL: test4
@@ -134,7 +134,5 @@ define amdgpu_kernel void @test4() {
   ret void
 }
 
-attributes #0 = { norecurse }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll
index d1152b8ae7de01..7afa60f97e8ee4 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-uniform-workgroup-size-v5.ll
@@ -6,7 +6,7 @@
 ; CHECK:  - .args:
 ; CHECK-LABEL:     .name:           kernel_uniform_workgroup
 ; CHECK:     .uniform_work_group_size: 1
-define amdgpu_kernel void @kernel_uniform_workgroup() #0 {
+define amdgpu_kernel void @kernel_uniform_workgroup() "uniform-work-group-size"="true" {
 bb:
   ret void
 }
@@ -14,7 +14,7 @@ bb:
 ; CHECK:  - .args:
 ; CHECK-LABEL:     .name:           kernel_non_uniform_workgroup
 ; CHECK-NOT:     .uniform_work_group_size:
-define amdgpu_kernel void @kernel_non_uniform_workgroup() #1 {
+define amdgpu_kernel void @kernel_non_uniform_workgroup() "uniform-work-group-size"="false" {
 bb:
   ret void
 }
@@ -26,8 +26,6 @@ define amdgpu_kernel void @kernel_no_attr() {
 bb:
   ret void
 }
-attributes #0 = { "uniform-work-group-size"="true" }
-attributes #1 = { "uniform-work-group-size"="false" }
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
index f34f9f38feeb4a..e6b2b1038fbd9c 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
@@ -80,8 +80,5 @@ end:
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-
-attributes #0 = { nounwind memory(argmem: read) }
-attributes #1 = { nounwind inaccessiblememonly }
+declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind memory(argmem: read)
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
index 338bea9d4f73f2..187f9a186b2aac 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
@@ -5,7 +5,7 @@
 ; SI-LABEL: {{^}}br_implicit_def:
 ; SI: %bb.0:
 ; SI-NEXT: s_cbranch_scc1
-define amdgpu_kernel void @br_implicit_def(ptr addrspace(1) %out, i32 %arg) #0 {
+define amdgpu_kernel void @br_implicit_def(ptr addrspace(1) %out, i32 %arg) nounwind {
 bb:
   br i1 undef, label %bb1, label %bb2
 
@@ -16,5 +16,3 @@ bb1:
 bb2:
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
index 856601ec7c6159..373d0d46592e93 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
@@ -13,7 +13,7 @@
 ; GCN: [[PREEXIT]]:
 ; GCN: [[EXIT]]:
 
-define amdgpu_vs float @test_dont_clobber_scc(i32 inreg %uni, i32 inreg %uni2) #0 {
+define amdgpu_vs float @test_dont_clobber_scc(i32 inreg %uni, i32 inreg %uni2) nounwind {
 entry:
   %cc.uni = icmp eq i32 %uni, 0
   br i1 %cc.uni, label %exit, label %blocka
@@ -33,5 +33,3 @@ exit:
   %r = select i1 %cc.phi, float 1.0, float 2.0
   ret float %r
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
index 80aa6ee0ab103f..de0a56d66a38db 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
@@ -34,9 +34,7 @@ bb6:                                              ; preds = %bb4, %bb3
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; Make sure this won't crash.
 ; SI-LABEL: {{^}}vcopy_i1_undef
diff --git a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
index c1a074a81b2aa2..d2efae577da5c9 100644
--- a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
@@ -8,7 +8,7 @@
 ; GCN-LABEL: {{^}}i16_eq:
 ; VI: v_cmp_eq_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_eq(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_eq(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -26,7 +26,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_ne:
 ; VI: v_cmp_ne_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_ne(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_ne(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -44,7 +44,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_ugt:
 ; VI: v_cmp_gt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_gt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_ugt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_ugt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -62,7 +62,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_uge:
 ; VI: v_cmp_ge_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_ge_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_uge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_uge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -80,7 +80,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_ult:
 ; VI: v_cmp_lt_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_ult(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_ult(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -98,7 +98,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_ule:
 ; VI: v_cmp_le_u16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_le_u32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_ule(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_ule(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -117,7 +117,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_sgt:
 ; VI: v_cmp_gt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_gt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_sgt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_sgt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -135,7 +135,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_sge:
 ; VI: v_cmp_ge_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_ge_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_sge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_sge(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -153,7 +153,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_slt:
 ; VI: v_cmp_lt_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_lt_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_slt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_slt(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -171,7 +171,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_sle:
 ; VI: v_cmp_le_i16_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_le_i32_e32 vcc, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_sle(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @i16_sle(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -190,7 +190,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_eq_v_s:
 ; VI: v_cmp_eq_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_eq_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_eq_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_eq_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -206,7 +206,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_ne_v_s:
 ; VI: v_cmp_ne_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_ne_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_ne_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_ne_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -222,7 +222,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_ugt_v_s:
 ; VI: v_cmp_lt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_lt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_ugt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_ugt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -238,7 +238,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_uge_v_s:
 ; VI: v_cmp_le_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_uge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_uge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -254,7 +254,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_ult_v_s:
 ; VI: v_cmp_gt_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_ult_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_ult_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -270,7 +270,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_ule_v_s:
 ; VI: v_cmp_ge_u16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_ge_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_ule_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_ule_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -286,7 +286,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_sgt_v_s:
 ; VI: v_cmp_lt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_lt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_sgt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_sgt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -302,7 +302,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_sge_v_s:
 ; VI: v_cmp_le_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_sge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_sge_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -318,7 +318,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_slt_v_s:
 ; VI: v_cmp_gt_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_gt_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_slt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_slt_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -334,7 +334,7 @@ entry:
 ; GCN-LABEL: {{^}}i16_sle_v_s:
 ; VI: v_cmp_ge_i16_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
 ; SI: v_cmp_ge_i32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @i16_sle_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) #0 {
+define amdgpu_kernel void @i16_sle_v_s(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, i16 %b) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -347,7 +347,4 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index 297b5180dfe9bd..32f965f81ca087 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
 
-define void @main(i1 %arg) #0 {
+define void @main(i1 %arg) "amdgpu-waves-per-eu"="10,10" {
 ; CHECK-LABEL: main:
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -452,14 +452,9 @@ bb50:                                             ; preds = %bb50, %bb48
   br label %bb50
 }
 
-declare i64 @llvm.amdgcn.s.getpc() #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-declare <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32 immarg) #3
-declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #3
-
-attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
-attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #2 = { nocallback nofree nosync nounwind willreturn memory(read) }
-attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) }
+declare i64 @llvm.amdgcn.s.getpc() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nocallback nofree nosync nounwind willreturn memory(read)
+declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nocallback nofree nosync nounwind willreturn memory(read)
+declare <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nocallback nofree nosync nounwind willreturn memory(read)
+declare void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32 immarg) nocallback nofree nosync nounwind willreturn memory(write)
+declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) nocallback nofree nosync nounwind willreturn memory(write)
diff --git a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
index 96fb7cfeb2775e..48d7d2ea389be9 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
@@ -9,12 +9,10 @@
 # ERR: error: ran out of registers during register allocation
 
 --- |
-  define void @foo() #0 {
+  define void @foo() "amdgpu-waves-per-eu"="8,8" {
     ret void
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="8,8"  }
-
 ...
 
 # CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
index 877dbde94c4d3f..9301f7fe599e08 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
@@ -4,7 +4,7 @@
 ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_i32 void (): illegal VGPR to SGPR copy
 ; GCN: ; illegal copy v1 to s9
 
-define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 {
+define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() nounwind {
   %vgpr = call i32 asm sideeffect "; def $0", "=${v1}"()
   call void asm sideeffect "; use $0", "${s9}"(i32 %vgpr)
   ret void
@@ -12,7 +12,7 @@ define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 {
 
 ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal VGPR to SGPR copy
 ; GCN: ; illegal copy v[0:1] to s[10:11]
-define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 {
+define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() nounwind {
   %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${v[0:1]}"()
   call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr)
   ret void
@@ -20,7 +20,7 @@ define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 {
 
 ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal VGPR to SGPR copy
 ; GCN: ; illegal copy v[0:3] to s[8:11]
-define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 {
+define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() nounwind {
   %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${v[0:3]}"()
   call void asm sideeffect "; use $0", "${s[8:11]}"(<4 x i32> %vgpr)
   ret void
@@ -28,7 +28,7 @@ define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 {
 
 ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal VGPR to SGPR copy
 ; GCN: ; illegal copy v[0:7] to s[8:15]
-define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 {
+define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() nounwind {
   %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${v[0:7]}"()
   call void asm sideeffect "; use $0", "${s[8:15]}"(<8 x i32> %vgpr)
   ret void
@@ -36,7 +36,7 @@ define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 {
 
 ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal VGPR to SGPR copy
 ; GCN: ; illegal copy v[0:15] to s[16:31]
-define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 {
+define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() nounwind {
   %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"()
   call void asm sideeffect "; use $0", "${s[16:31]}"(<16 x i32> %vgpr)
   ret void
@@ -45,7 +45,7 @@ define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 {
 ; ERR: error: <unknown>:0:0: in function illegal_agpr_to_sgpr_copy_i32 void (): illegal VGPR to SGPR copy
 ; GCN: v_accvgpr_read_b32 [[COPY1:v[0-9]+]], a1
 ; GCN: ; illegal copy [[COPY1]] to s9
-define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_i32() #1 {
+define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_i32() nounwind "target-cpu"="gfx908" {
   %agpr = call i32 asm sideeffect "; def $0", "=${a1}"()
   call void asm sideeffect "; use $0", "${s9}"(i32 %agpr)
   ret void
@@ -55,11 +55,8 @@ define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_i32() #1 {
 ; GCN-DAG: v_accvgpr_read_b32 v[[COPY1L:[0-9]+]], a0
 ; GCN-DAG: v_accvgpr_read_b32 v[[COPY1H:[0-9]+]], a1
 ; GCN: ; illegal copy v[[[COPY1L]]:[[COPY1H]]] to s[10:11]
-define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_v2i32() #1 {
+define amdgpu_kernel void @illegal_agpr_to_sgpr_copy_v2i32() nounwind "target-cpu"="gfx908" {
   %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${a[0:1]}"()
   call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr)
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "target-cpu"="gfx908" }
diff --git a/llvm/test/CodeGen/AMDGPU/image-attributes.ll b/llvm/test/CodeGen/AMDGPU/image-attributes.ll
index 29d0f0b08c4d75..d3e57a76c82b48 100644
--- a/llvm/test/CodeGen/AMDGPU/image-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/image-attributes.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @width_2d (ptr addrspace(1) %in,
                        ptr addrspace(1) %out) {
 entry:
   %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   %1 = extractvalue [3 x i32] %0, 0
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -24,7 +24,7 @@ define amdgpu_kernel void @width_3d (ptr addrspace(1) %in,
                        ptr addrspace(1) %out) {
 entry:
   %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   %1 = extractvalue [3 x i32] %0, 0
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -41,7 +41,7 @@ define amdgpu_kernel void @height_2d (ptr addrspace(1) %in,
                         ptr addrspace(1) %out) {
 entry:
   %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   %1 = extractvalue [3 x i32] %0, 1
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -54,7 +54,7 @@ define amdgpu_kernel void @height_3d (ptr addrspace(1) %in,
                         ptr addrspace(1) %out) {
 entry:
   %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   %1 = extractvalue [3 x i32] %0, 1
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -71,7 +71,7 @@ define amdgpu_kernel void @depth_3d (ptr addrspace(1) %in,
                        ptr addrspace(1) %out) {
 entry:
   %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   %1 = extractvalue [3 x i32] %0, 2
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -88,7 +88,7 @@ define amdgpu_kernel void @data_type_2d (ptr addrspace(1) %in,
                            ptr addrspace(1) %out) {
 entry:
   %0 = call [2 x i32] @llvm.OpenCL.image.get.format.2d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   %1 = extractvalue [2 x i32] %0, 0
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -101,7 +101,7 @@ define amdgpu_kernel void @data_type_3d (ptr addrspace(1) %in,
                                      ptr addrspace(1) %out) {
 entry:
   %0 = call [2 x i32] @llvm.OpenCL.image.get.format.3d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   %1 = extractvalue [2 x i32] %0, 0
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -118,7 +118,7 @@ define amdgpu_kernel void @channel_order_2d (ptr addrspace(1) %in,
                                ptr addrspace(1) %out) {
 entry:
   %0 = call [2 x i32] @llvm.OpenCL.image.get.format.2d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   %1 = extractvalue [2 x i32] %0, 1
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -131,7 +131,7 @@ define amdgpu_kernel void @channel_order_3d (ptr addrspace(1) %in,
                                          ptr addrspace(1) %out) {
 entry:
   %0 = call [2 x i32] @llvm.OpenCL.image.get.format.3d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   %1 = extractvalue [2 x i32] %0, 1
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -152,7 +152,7 @@ define amdgpu_kernel void @image_arg_2nd (ptr addrspace(1) %in1,
                             ptr addrspace(1) %out) {
 entry:
   %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d(
-      ptr addrspace(1) %in2) #0
+      ptr addrspace(1) %in2) readnone
   %1 = extractvalue [3 x i32] %0, 1
   store i32 %1, ptr addrspace(1) %out
   ret void
@@ -161,12 +161,10 @@ entry:
 %opencl.image2d_t = type opaque
 %opencl.image3d_t = type opaque
 
-declare [3 x i32] @llvm.OpenCL.image.get.size.2d(ptr addrspace(1)) #0
-declare [3 x i32] @llvm.OpenCL.image.get.size.3d(ptr addrspace(1)) #0
-declare [2 x i32] @llvm.OpenCL.image.get.format.2d(ptr addrspace(1)) #0
-declare [2 x i32] @llvm.OpenCL.image.get.format.3d(ptr addrspace(1)) #0
-
-attributes #0 = { readnone }
+declare [3 x i32] @llvm.OpenCL.image.get.size.2d(ptr addrspace(1)) readnone
+declare [3 x i32] @llvm.OpenCL.image.get.size.3d(ptr addrspace(1)) readnone
+declare [2 x i32] @llvm.OpenCL.image.get.format.2d(ptr addrspace(1)) readnone
+declare [2 x i32] @llvm.OpenCL.image.get.format.3d(ptr addrspace(1)) readnone
 
 !opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
 !0 = !{ptr @width_2d,
diff --git a/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll b/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll
index 83b650e2d755e2..fbac89c357cbe7 100644
--- a/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll
@@ -661,9 +661,7 @@ define amdgpu_ps void @load_1d_v4f16_tfe_dmask15(<8 x i32> inreg %rsrc, i32 %s)
   ret void
 }
 
-declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/image-resource-id.ll b/llvm/test/CodeGen/AMDGPU/image-resource-id.ll
index 08a9dacfc0f5ab..1c2494eab0bb14 100644
--- a/llvm/test/CodeGen/AMDGPU/image-resource-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/image-resource-id.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @test_2d_rd_1_0(ptr addrspace(1) %in, ; read_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -25,7 +25,7 @@ define amdgpu_kernel void @test_3d_rd_1_0(ptr addrspace(1) %in, ; read_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -41,7 +41,7 @@ define amdgpu_kernel void @test_2d_wr_1_0(ptr addrspace(1) %in, ; write_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -55,7 +55,7 @@ define amdgpu_kernel void @test_3d_wr_1_0(ptr addrspace(1) %in, ; write_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in) #0
+      ptr addrspace(1) %in) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -72,7 +72,7 @@ define amdgpu_kernel void @test_2d_rd_2_0(ptr addrspace(1) %in1, ; read_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in1) #0
+      ptr addrspace(1) %in1) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -87,7 +87,7 @@ define amdgpu_kernel void @test_2d_rd_2_1(ptr addrspace(1) %in1, ; read_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in2) #0
+      ptr addrspace(1) %in2) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -102,7 +102,7 @@ define amdgpu_kernel void @test_3d_rd_2_0(ptr addrspace(1) %in1, ; read_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in1) #0
+      ptr addrspace(1) %in1) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -117,7 +117,7 @@ define amdgpu_kernel void @test_3d_rd_2_1(ptr addrspace(1) %in1, ; read_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in2) #0
+      ptr addrspace(1) %in2) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -134,7 +134,7 @@ define amdgpu_kernel void @test_2d_wr_2_0(ptr addrspace(1) %in1, ; write_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in1) #0
+      ptr addrspace(1) %in1) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -149,7 +149,7 @@ define amdgpu_kernel void @test_2d_wr_2_1(ptr addrspace(1) %in1, ; write_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in2) #0
+      ptr addrspace(1) %in2) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -164,7 +164,7 @@ define amdgpu_kernel void @test_3d_wr_2_0(ptr addrspace(1) %in1, ; write_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in1) #0
+      ptr addrspace(1) %in1) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -179,7 +179,7 @@ define amdgpu_kernel void @test_3d_wr_2_1(ptr addrspace(1) %in1, ; write_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in2) #0
+      ptr addrspace(1) %in2) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -197,7 +197,7 @@ define amdgpu_kernel void @test_2d_rd_3_0(ptr addrspace(1) %in1, ; read_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in3) #0
+      ptr addrspace(1) %in3) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -214,7 +214,7 @@ define amdgpu_kernel void @test_3d_rd_3_0(ptr addrspace(1) %in1, ; read_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in3) #0
+      ptr addrspace(1) %in3) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -232,7 +232,7 @@ define amdgpu_kernel void @test_2d_wr_3_0(ptr addrspace(1) %in1, ; write_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in3) #0
+      ptr addrspace(1) %in3) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -249,7 +249,7 @@ define amdgpu_kernel void @test_3d_wr_3_0(ptr addrspace(1) %in1, ; write_only
                             ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in3) #0
+      ptr addrspace(1) %in3) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -267,7 +267,7 @@ define amdgpu_kernel void @test_2d_mix_3_0(ptr addrspace(1) %in1, ; write_only
                              ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in3) #0
+      ptr addrspace(1) %in3) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -283,7 +283,7 @@ define amdgpu_kernel void @test_3d_mix_3_0(ptr addrspace(1) %in1, ; write_only
                              ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in3) #0
+      ptr addrspace(1) %in3) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -299,7 +299,7 @@ define amdgpu_kernel void @test_2d_mix_3_1(ptr addrspace(1) %in1, ; write_only
                              ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
-      ptr addrspace(1) %in3) #0
+      ptr addrspace(1) %in3) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -315,7 +315,7 @@ define amdgpu_kernel void @test_3d_mix_3_1(ptr addrspace(1) %in1, ; write_only
                              ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
-      ptr addrspace(1) %in3) #0
+      ptr addrspace(1) %in3) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -324,10 +324,8 @@ entry:
 %opencl.image2d_t = type opaque
 %opencl.image3d_t = type opaque
 
-declare i32 @llvm.OpenCL.image.get.resource.id.2d(ptr addrspace(1)) #0
-declare i32 @llvm.OpenCL.image.get.resource.id.3d(ptr addrspace(1)) #0
-
-attributes #0 = { readnone }
+declare i32 @llvm.OpenCL.image.get.resource.id.2d(ptr addrspace(1)) readnone
+declare i32 @llvm.OpenCL.image.get.resource.id.3d(ptr addrspace(1)) readnone
 
 !opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13,
                     !14, !15, !16, !17, !18, !19}
diff --git a/llvm/test/CodeGen/AMDGPU/image-schedule.ll b/llvm/test/CodeGen/AMDGPU/image-schedule.ll
index dbd9efc58e59d1..07b693b041bd2e 100644
--- a/llvm/test/CodeGen/AMDGPU/image-schedule.ll
+++ b/llvm/test/CodeGen/AMDGPU/image-schedule.ll
@@ -10,9 +10,9 @@
 ; GCN: image_load
 ; GCN: image_store
 
-define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <3 x i32> inreg %arg3, i32 inreg %arg4, <3 x i32> %arg5) local_unnamed_addr #0 {
+define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <3 x i32> inreg %arg3, i32 inreg %arg4, <3 x i32> %arg5) local_unnamed_addr nounwind {
 .entry:
-  %tmp = call i64 @llvm.amdgcn.s.getpc() #1
+  %tmp = call i64 @llvm.amdgcn.s.getpc() nounwind readnone speculatable
   %tmp6 = bitcast i64 %tmp to <2 x i32>
   %.0.vec.insert = insertelement <2 x i32> undef, i32 %arg2, i32 0
   %.4.vec.insert = shufflevector <2 x i32> %.0.vec.insert, <2 x i32> %tmp6, <2 x i32> <i32 0, i32 3>
@@ -23,30 +23,25 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1
   %tmp12 = load <8 x i32>, ptr addrspace(4) %tmp10, align 16
   %tmp13.0 = extractelement <3 x i32> %tmp9, i32 0
   %tmp13.1 = extractelement <3 x i32> %tmp9, i32 1
-  %tmp14 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp12, i32 0, i32 0) #0
+  %tmp14 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp12, i32 0, i32 0) nounwind
   %tmp15 = inttoptr i64 %tmp7 to ptr addrspace(4)
   %tmp16 = load <8 x i32>, ptr addrspace(4) %tmp15, align 16
-  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp14, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp16, i32 0, i32 0) #0
+  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp14, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp16, i32 0, i32 0) nounwind
   %tmp17 = load <8 x i32>, ptr addrspace(4) %tmp15, align 16
-  %tmp18 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 165, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp17, i32 0, i32 0) #0
+  %tmp18 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 165, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp17, i32 0, i32 0) nounwind
   %tmp19 = getelementptr [4294967295 x i8], ptr addrspace(4) %tmp8, i64 0, i64 64
   %tmp21 = load <8 x i32>, ptr addrspace(4) %tmp19, align 16
-  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp18, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp21, i32 0, i32 0) #0
+  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp18, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp21, i32 0, i32 0) nounwind
   ret void
 }
 
 ; Function Attrs: nounwind readnone speculatable
-declare i64 @llvm.amdgcn.s.getpc() #1
+declare i64 @llvm.amdgcn.s.getpc() nounwind readnone speculatable
 
 ; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
 
 ; Function Attrs: nounwind writeonly
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind readonly }
-attributes #3 = { nounwind writeonly }
+declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) nounwind writeonly
 
 !0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
index 9927f4d166badc..1d179920f72b7f 100644
--- a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
+++ b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
@@ -8,15 +8,13 @@
 ; GCN-LABEL: name: _amdgpu_cs_main
 ; GCN-LABEL: bb.0
 ; GCN: IMAGE_LOAD_V4_V2
-define amdgpu_cs void @_amdgpu_cs_main(i32 %dummy) local_unnamed_addr #0 {
+define amdgpu_cs void @_amdgpu_cs_main(i32 %dummy) local_unnamed_addr nounwind {
 .entry:
   %unused.result = tail call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 0) #3
-  call void asm sideeffect ";", "" () #0
+  call void asm sideeffect ";", "" () nounwind
   ret void
 }
 
 ; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
   
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly  }
diff --git a/llvm/test/CodeGen/AMDGPU/imm.ll b/llvm/test/CodeGen/AMDGPU/imm.ll
index f7a0e296fa1733..52efa75119ae6e 100644
--- a/llvm/test/CodeGen/AMDGPU/imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/imm.ll
@@ -1842,10 +1842,8 @@ define amdgpu_vs void @literal_folding(float %arg) {
 main_body:
   %tmp = fmul float %arg, 0x3FE86A7F00000000
   %tmp1 = fmul float %arg, 0xBFE86A7F00000000
-  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) nounwind
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/immv216.ll b/llvm/test/CodeGen/AMDGPU/immv216.ll
index ae51c3edf1c7e7..30f9215e95dfc9 100644
--- a/llvm/test/CodeGen/AMDGPU/immv216.ll
+++ b/llvm/test/CodeGen/AMDGPU/immv216.ll
@@ -8,7 +8,7 @@
 ; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(ptr addrspace(1) %out) nounwind {
   store <2 x i16> <i16 -32768, i16 -32768>, ptr addrspace(1) %out
   ret void
 }
@@ -16,7 +16,7 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(ptr addrspace(1) %out)
 ; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_0.0_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_0.0_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half 0.0, half 0.0>, ptr addrspace(1) %out
   ret void
 }
@@ -24,7 +24,7 @@ define amdgpu_kernel void @store_inline_imm_0.0_v2f16(ptr addrspace(1) %out) #0
 ; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_imm_neg_0.0_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_imm_neg_0.0_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half -0.0, half -0.0>, ptr addrspace(1) %out
   ret void
 }
@@ -32,7 +32,7 @@ define amdgpu_kernel void @store_imm_neg_0.0_v2f16(ptr addrspace(1) %out) #0 {
 ; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_0.5_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_0.5_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half 0.5, half 0.5>, ptr addrspace(1) %out
   ret void
 }
@@ -40,7 +40,7 @@ define amdgpu_kernel void @store_inline_imm_0.5_v2f16(ptr addrspace(1) %out) #0
 ; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half -0.5, half -0.5>, ptr addrspace(1) %out
   ret void
 }
@@ -48,7 +48,7 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(ptr addrspace(1) %out) #
 ; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_1.0_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_1.0_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half 1.0, half 1.0>, ptr addrspace(1) %out
   ret void
 }
@@ -56,7 +56,7 @@ define amdgpu_kernel void @store_inline_imm_1.0_v2f16(ptr addrspace(1) %out) #0
 ; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half -1.0, half -1.0>, ptr addrspace(1) %out
   ret void
 }
@@ -64,7 +64,7 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(ptr addrspace(1) %out) #
 ; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_2.0_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_2.0_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half 2.0, half 2.0>, ptr addrspace(1) %out
   ret void
 }
@@ -72,7 +72,7 @@ define amdgpu_kernel void @store_inline_imm_2.0_v2f16(ptr addrspace(1) %out) #0
 ; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half -2.0, half -2.0>, ptr addrspace(1) %out
   ret void
 }
@@ -80,7 +80,7 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(ptr addrspace(1) %out) #
 ; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_4.0_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_4.0_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half 4.0, half 4.0>, ptr addrspace(1) %out
   ret void
 }
@@ -88,7 +88,7 @@ define amdgpu_kernel void @store_inline_imm_4.0_v2f16(ptr addrspace(1) %out) #0
 ; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half -4.0, half -4.0>, ptr addrspace(1) %out
   ret void
 }
@@ -96,7 +96,7 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(ptr addrspace(1) %out) #
 ; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half 0xH3118, half 0xH3118>, ptr addrspace(1) %out
   ret void
 }
@@ -104,7 +104,7 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(ptr addrspace(1) %out)
 ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118 ; encoding
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half 0xHB118, half 0xHB118>, ptr addrspace(1) %out
   ret void
 }
@@ -112,7 +112,7 @@ define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(ptr addrspace(1) %ou
 ; GCN-LABEL: {{^}}store_literal_imm_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c006c00
 ; GCN: buffer_store_{{dword|b32}} [[REG]]
-define amdgpu_kernel void @store_literal_imm_v2f16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @store_literal_imm_v2f16(ptr addrspace(1) %out) nounwind {
   store <2 x half> <half 4096.0, half 4096.0>, ptr addrspace(1) %out
   ret void
 }
@@ -132,7 +132,7 @@ define amdgpu_kernel void @store_literal_imm_v2f16(ptr addrspace(1) %out) #0 {
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_0.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_0.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 0.0, half 0.0>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -157,7 +157,7 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(ptr addrspace(1) %out, <2 x
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0.5
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_0.5_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_0.5_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 0.5, half 0.5>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -182,7 +182,7 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(ptr addrspace(1) %out, <2 x
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -0.5
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half -0.5, half -0.5>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -207,7 +207,7 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(ptr addrspace(1) %out, <
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1.0
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_1.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_1.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 1.0, half 1.0>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -233,7 +233,7 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(ptr addrspace(1) %out, <2 x
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -1.0
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half -1.0, half -1.0>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -258,7 +258,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(ptr addrspace(1) %out, <
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2.0
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_2.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_2.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 2.0, half 2.0>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -283,7 +283,7 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(ptr addrspace(1) %out, <2 x
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -2.0
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half -2.0, half -2.0>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -308,7 +308,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(ptr addrspace(1) %out, <
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 4.0
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_4.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_4.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 4.0, half 4.0>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -333,7 +333,7 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(ptr addrspace(1) %out, <2 x
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -4.0
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half -4.0, half -4.0>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -355,7 +355,7 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(ptr addrspace(1) %out, <
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load <2 x half>, ptr addrspace(1) %in
   %y = fadd <2 x half> %x, <half 0.5, half 0.5>
   store <2 x half> %y, ptr addrspace(1) %out
@@ -378,7 +378,7 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(ptr addrspace(1) %ou
 ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; VI: buffer_store_dword
-define amdgpu_kernel void @commute_add_literal_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @commute_add_literal_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load <2 x half>, ptr addrspace(1) %in
   %y = fadd <2 x half> %x, <half 1024.0, half 1024.0>
   store <2 x half> %y, ptr addrspace(1) %out
@@ -404,7 +404,7 @@ define amdgpu_kernel void @commute_add_literal_v2f16(ptr addrspace(1) %out, ptr
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1 ; encoding
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_1_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_1_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 0xH0001, half 0xH0001>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -430,7 +430,7 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(ptr addrspace(1) %out, <2 x ha
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2 ; encoding
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_2_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_2_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 0xH0002, half 0xH0002>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -456,7 +456,7 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(ptr addrspace(1) %out, <2 x ha
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16 ; encoding
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_16_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_16_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 0xH0010, half 0xH0010>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -475,7 +475,7 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(ptr addrspace(1) %out, <2 x h
 ; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1 ; encoding
 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
 ; VI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %xbc = bitcast <2 x half> %x to i32
   %y = add i32 %xbc, -1
   %ybc = bitcast i32 %y to <2 x half>
@@ -496,7 +496,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(ptr addrspace(1) %out, <2
 ; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe ; encoding
 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
 ; VI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %xbc = bitcast <2 x half> %x to i32
   %y = add i32 %xbc, 4294901758 ; 0xfffefffe
   %ybc = bitcast i32 %y to <2 x half>
@@ -518,7 +518,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(ptr addrspace(1) %out, <2
 ; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0 ; encoding
 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
 ; VI: buffer_store_dword [[REG]]
-define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %xbc = bitcast <2 x half> %x to i32
   %y = add i32 %xbc, 4293984240 ; 0xfff0fff0
   %ybc = bitcast i32 %y to <2 x half>
@@ -545,7 +545,7 @@ define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(ptr addrspace(1) %out, <2
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 63
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_63_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_63_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 0xH003F, half 0xH003F>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -570,7 +570,7 @@ define amdgpu_kernel void @add_inline_imm_63_v2f16(ptr addrspace(1) %out, <2 x h
 ; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 64
 ; VI: v_or_b32
 ; VI: buffer_store_dword
-define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 {
+define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x half> %x) nounwind {
   %y = fadd <2 x half> %x, <half 0xH0040, half 0xH0040>
   store <2 x half> %y, ptr addrspace(1) %out
   ret void
@@ -664,5 +664,3 @@ define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
   %y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
   ret <2 x i16> %y
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
index e37b6ff10ffa96..585f4e7d21c34a 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine,infer-alignment %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
 ; GCN-LABEL: @get_local_size_x(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; GCN-NEXT:    [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
@@ -22,7 +22,7 @@ define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) #0 {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_local_size_y(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @get_local_size_y(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
 ; GCN-LABEL: @get_local_size_y(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; GCN-NEXT:    [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
@@ -43,7 +43,7 @@ define amdgpu_kernel void @get_local_size_y(ptr addrspace(1) %out) #0 {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_local_size_z(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @get_local_size_z(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
 ; GCN-LABEL: @get_local_size_z(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; GCN-NEXT:    [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
@@ -64,7 +64,7 @@ define amdgpu_kernel void @get_local_size_z(ptr addrspace(1) %out) #0 {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_remainder_x(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @get_remainder_x(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
 ; GCN-LABEL: @get_remainder_x(
 ; GCN-NEXT:    store i16 0, ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -77,7 +77,7 @@ define amdgpu_kernel void @get_remainder_x(ptr addrspace(1) %out) #0 {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_remainder_y(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @get_remainder_y(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
 ; GCN-LABEL: @get_remainder_y(
 ; GCN-NEXT:    store i16 0, ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -90,7 +90,7 @@ define amdgpu_kernel void @get_remainder_y(ptr addrspace(1) %out) #0 {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_remainder_z(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @get_remainder_z(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
 ; GCN-LABEL: @get_remainder_z(
 ; GCN-NEXT:    store i16 0, ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -103,7 +103,7 @@ define amdgpu_kernel void @get_remainder_z(ptr addrspace(1) %out) #0 {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_work_group_size_x(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @get_work_group_size_x(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
 ; GCN-LABEL: @get_work_group_size_x(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; GCN-NEXT:    [[GEP_X:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
@@ -119,7 +119,7 @@ define amdgpu_kernel void @get_work_group_size_x(ptr addrspace(1) %out) #0 {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_work_group_size_y(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @get_work_group_size_y(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
 ; GCN-LABEL: @get_work_group_size_y(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; GCN-NEXT:    [[GEP_Y:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
@@ -135,7 +135,7 @@ define amdgpu_kernel void @get_work_group_size_y(ptr addrspace(1) %out) #0 {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_work_group_size_z(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @get_work_group_size_z(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
 ; GCN-LABEL: @get_work_group_size_z(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; GCN-NEXT:    [[GEP_Z:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
@@ -151,7 +151,7 @@ define amdgpu_kernel void @get_work_group_size_z(ptr addrspace(1) %out) #0 {
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_work_group_size_x_reqd(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @get_work_group_size_x_reqd(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
 ; GCN-LABEL: @get_work_group_size_x_reqd(
 ; GCN-NEXT:    store i16 8, ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -164,7 +164,7 @@ define amdgpu_kernel void @get_work_group_size_x_reqd(ptr addrspace(1) %out) #0
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_work_group_size_y_reqd(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @get_work_group_size_y_reqd(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
 ; GCN-LABEL: @get_work_group_size_y_reqd(
 ; GCN-NEXT:    store i16 16, ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -177,7 +177,7 @@ define amdgpu_kernel void @get_work_group_size_y_reqd(ptr addrspace(1) %out) #0
 }
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define amdgpu_kernel void @get_work_group_size_z_reqd(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @get_work_group_size_z_reqd(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
 ; GCN-LABEL: @get_work_group_size_z_reqd(
 ; GCN-NEXT:    store i16 2, ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -190,14 +190,11 @@ define amdgpu_kernel void @get_work_group_size_z_reqd(ptr addrspace(1) %out) #0
 }
 
 
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workgroup.id.z() #1
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone speculatable
 
 !llvm.module.flags = !{!1}
-
-attributes #0 = { nounwind "uniform-work-group-size"="true" }
-attributes #1 = { nounwind readnone speculatable }
 !0 = !{i32 8, i32 16, i32 2}
 !1 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index 4c21f872974559..0173360deb8e85 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -3,22 +3,22 @@
 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V5 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V6 %s
 
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
+declare i32 @llvm.amdgcn.workgroup.id.x() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workgroup.id.y() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workgroup.id.z() nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-declare i32 @llvm.amdgcn.lds.kernel.id() #0
-declare i64 @llvm.amdgcn.dispatch.id() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.y() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.z() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.lds.kernel.id() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.amdgcn.dispatch.id() nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
+declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 ; Avoid adding all of these to the output attribute sets
 define void @use_everything_else() {
@@ -249,9 +249,6 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr
   ret void
 }
 
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
 
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
index 31fa32b3475cb7..2133554a5daf0f 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
@@ -18,7 +18,7 @@
 ; GCN-COUNT-32: v_cndmask_b32
 
 ; GCN-COUNT-4: buffer_store_dwordx4
-define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in, <16 x i32> %vec0) #0 {
+define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in, <16 x i32> %vec0) nounwind {
 entry:
   %id = call i32 @llvm.amdgcn.workitem.id.x() #1
   %id.ext = zext i32 %id to i64
@@ -53,7 +53,7 @@ bb2:
 ; GCN-NEXT: v_mov_b32_e32
 ; GCN-NOT: v_mov_b32_e32
 ; GCN-NEXT: s_set_gpr_idx_off
-define amdgpu_kernel void @insert_w_offset_multiple_in_block(ptr addrspace(1) %out1, i32 %in) #0 {
+define amdgpu_kernel void @insert_w_offset_multiple_in_block(ptr addrspace(1) %out1, i32 %in) nounwind {
 entry:
   %add1 = add i32 %in, 1
   %ins1 = insertelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, float 17.0, i32 %add1
@@ -75,7 +75,7 @@ declare hidden void @foo()
 ; GCN-NEXT: v_mov_b32_e32 {{v[0-9]+}}, 8
 ; GCN-NEXT: s_set_gpr_idx_off
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @insertelement_with_call(ptr addrspace(1) %ptr, i32 %idx) #0 {
+define amdgpu_kernel void @insertelement_with_call(ptr addrspace(1) %ptr, i32 %idx) nounwind {
   %vec = load <16 x i32>, ptr addrspace(1) %ptr
   %i6 = insertelement <16 x i32> %vec, i32 8, i32 %idx
   call void @foo()
@@ -85,5 +85,3 @@ define amdgpu_kernel void @insertelement_with_call(ptr addrspace(1) %ptr, i32 %i
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare void @llvm.amdgcn.s.barrier() #2
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll
index cbb5d9e1692843..072ed256629a97 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll
@@ -23,9 +23,9 @@
 ; GCN-COUNT-32: v_cndmask_b32
 
 ; GCN-COUNT-4: buffer_store_dwordx4
-define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in, <16 x i32> %vec0) #0 {
+define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in, <16 x i32> %vec0) nounwind {
 entry:
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %id.ext = zext i32 %id to i64
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %id.ext
   %idx0 = load volatile i32, ptr addrspace(1) %gep
@@ -45,9 +45,5 @@ bb2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.s.barrier() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind convergent }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare void @llvm.amdgcn.s.barrier() nounwind convergent
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 1f92427fe8a237..17877b1574821a 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -137,7 +137,7 @@ entry:
 ; GCN: buffer_store_dword [[RESULT]]
 define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) {
 entry:
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %index = add i32 %id, -512
   %value = extractelement <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 %index
   store i32 %value, ptr addrspace(1) %out
@@ -293,7 +293,7 @@ entry:
 ; GCN-COUNT-4:  buffer_store_dwordx4
 define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 entry:
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %index = add i32 %id, -512
   %value = insertelement <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 33, i32 %index
   store <16 x i32> %value, ptr addrspace(1) %out
@@ -307,7 +307,7 @@ entry:
 ; GCN-COUNT-4:  buffer_store_dwordx4
 define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 entry:
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %index = add i32 %id, -16
   %value = insertelement <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 500, i32 %index
   store <16 x i32> %value, ptr addrspace(1) %out
@@ -326,9 +326,9 @@ entry:
 
 ; GCN: buffer_store_dword [[RESULT0]]
 ; GCN: buffer_store_dword [[RESULT1]]
-define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) nounwind {
 entry:
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %id.ext = zext i32 %id to i64
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %id.ext
   %idx0 = load volatile i32, ptr addrspace(1) %gep
@@ -355,7 +355,7 @@ bb2:
 
 
 ; GCN-LABEL: {{^}}insert_adjacent_blocks:
-define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) #0 {
+define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) nounwind {
 bb:
   %tmp = icmp eq i32 %arg, 0
   br i1 %tmp, label %bb1, label %bb4
@@ -363,13 +363,13 @@ bb:
 bb1:                                              ; preds = %bb
   %tmp2 = load volatile <4 x float>, ptr addrspace(1) undef
   %tmp3 = insertelement <4 x float> %tmp2, float %val0, i32 undef
-  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp3) #0 ; Prevent block optimize out
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp3) nounwind ; Prevent block optimize out
   br label %bb7
 
 bb4:                                              ; preds = %bb
   %tmp5 = load volatile <4 x float>, ptr addrspace(1) undef
   %tmp6 = insertelement <4 x float> %tmp5, float %val0, i32 undef
-  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp6) #0 ; Prevent block optimize out
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp6) nounwind ; Prevent block optimize out
   br label %bb7
 
 bb7:                                              ; preds = %bb4, %bb1
@@ -404,7 +404,7 @@ bb7:                                              ; preds = %bb4, %bb1
 ; GCN: ds_write_b32
 ; GCN: ds_write_b32
 ; GCN: s_endpgm
-define amdgpu_kernel void @multi_same_block(i32 %arg) #0 {
+define amdgpu_kernel void @multi_same_block(i32 %arg) nounwind {
 bb:
   %tmp1 = add i32 %arg, -16
   %tmp2 = insertelement <9 x float> <float 1.700000e+01, float 1.800000e+01, float 1.900000e+01, float 2.000000e+01, float 2.100000e+01, float 2.200000e+01, float 2.300000e+01, float 2.400000e+01, float 2.500000e+01>, float 4.000000e+00, i32 %tmp1
@@ -523,7 +523,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(ptr addrspace(1) %out,
 ; GCN: s_mov_b64 exec,
 ; GCN: s_cbranch_execnz [[BB2]]
 
-define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 {
+define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) nounwind {
 bb:
   br label %bb2
 
@@ -543,9 +543,5 @@ bb8:                                              ; preds = %bb2
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.s.barrier() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind convergent }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare void @llvm.amdgcn.s.barrier() nounwind convergent
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
index 40cb061f61ab4b..f9d1a5ca1c5c5b 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+promote-alloca -disable-promote-alloca-to-vector -verify-machineinstrs < %s | FileCheck --check-prefixes=CI-PROMOTE,SI %s
 
-declare void @llvm.amdgcn.s.barrier() #0
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
 
 ; SI-LABEL: {{^}}private_access_f64_alloca:
 
@@ -20,7 +20,7 @@ declare void @llvm.amdgcn.s.barrier() #0
 ; SI-PROMOTE: ds_read_b64
 ; CI-PROMOTE: ds_write_b64
 ; CI-PROMOTE: ds_read_b64
-define amdgpu_kernel void @private_access_f64_alloca(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %b) #1 {
+define amdgpu_kernel void @private_access_f64_alloca(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %b) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" {
   %val = load double, ptr addrspace(1) %in, align 8
   %array = alloca [8 x double], align 8, addrspace(5)
   %ptr = getelementptr inbounds [8 x double], ptr addrspace(5) %array, i32 0, i32 %b
@@ -51,7 +51,7 @@ define amdgpu_kernel void @private_access_f64_alloca(ptr addrspace(1) noalias %o
 ; SI-PROMOTE: ds_read_b64
 ; CI-PROMOTE: ds_write_b128
 ; CI-PROMOTE: ds_read_b128
-define amdgpu_kernel void @private_access_v2f64_alloca(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %b) #1 {
+define amdgpu_kernel void @private_access_v2f64_alloca(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %b) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" {
   %val = load <2 x double>, ptr addrspace(1) %in, align 16
   %array = alloca [4 x <2 x double>], align 16, addrspace(5)
   %ptr = getelementptr inbounds [4 x <2 x double>], ptr addrspace(5) %array, i32 0, i32 %b
@@ -77,7 +77,7 @@ define amdgpu_kernel void @private_access_v2f64_alloca(ptr addrspace(1) noalias
 ; SI-PROMOTE: ds_read_b64
 ; CI-PROMOTE: ds_write_b64
 ; CI-PROMOTE: ds_read_b64
-define amdgpu_kernel void @private_access_i64_alloca(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %b) #1 {
+define amdgpu_kernel void @private_access_i64_alloca(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %b) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" {
   %val = load i64, ptr addrspace(1) %in, align 8
   %array = alloca [8 x i64], align 8, addrspace(5)
   %ptr = getelementptr inbounds [8 x i64], ptr addrspace(5) %array, i32 0, i32 %b
@@ -109,7 +109,7 @@ define amdgpu_kernel void @private_access_i64_alloca(ptr addrspace(1) noalias %o
 ; SI-PROMOTE: ds_read_b64
 ; CI-PROMOTE: ds_write_b128
 ; CI-PROMOTE: ds_read_b128
-define amdgpu_kernel void @private_access_v2i64_alloca(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %b) #1 {
+define amdgpu_kernel void @private_access_v2i64_alloca(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i32 %b) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" {
   %val = load <2 x i64>, ptr addrspace(1) %in, align 16
   %array = alloca [4 x <2 x i64>], align 16, addrspace(5)
   %ptr = getelementptr inbounds [4 x <2 x i64>], ptr addrspace(5) %array, i32 0, i32 %b
@@ -119,6 +119,3 @@ define amdgpu_kernel void @private_access_v2i64_alloca(ptr addrspace(1) noalias
   store <2 x i64> %result, ptr addrspace(1) %out, align 16
   ret void
 }
-
-attributes #0 = { convergent nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" }
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
index 7864564d289178..86654b1c220c69 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
@@ -3,13 +3,11 @@
 
 --- |
 
-  define void @main() #0 {
+  define void @main() "amdgpu-waves-per-eu"="10,10" {
   bb:
     ret void
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
-
 ...
 ---
 name:            main
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
index 1030cdb1b43fc1..ba7b8d70304d71 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
@@ -1,12 +1,10 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,0 -simplify-mir -o - %s | FileCheck %s
 --- |
-  define void @main() #0 {
+  define void @main() "amdgpu-waves-per-eu"="10,10" {
     ret void
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
-
 ...
 ---
 name:            main
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm-reserved-regs.ll b/llvm/test/CodeGen/AMDGPU/inline-asm-reserved-regs.ll
index f20d720c3876ba..0e621347994e96 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-asm-reserved-regs.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm-reserved-regs.ll
@@ -2,7 +2,7 @@
 
 ; ERR: warning: inline asm clobber list contains reserved registers: v42
 ; ERR: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
-define amdgpu_kernel void @clobber_occupancy_limited_vgpr() #0 {
+define amdgpu_kernel void @clobber_occupancy_limited_vgpr() "amdgpu-waves-per-eu"="10,10" {
 entry:
   call void asm sideeffect "; clobber $0", "~{v42}"()
   ret void
@@ -10,7 +10,7 @@ entry:
 
 ; ERR: warning: inline asm clobber list contains reserved registers: v[42:43]
 ; ERR: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
-define amdgpu_kernel void @clobber_occupancy_limited_vgpr64() #0 {
+define amdgpu_kernel void @clobber_occupancy_limited_vgpr64() "amdgpu-waves-per-eu"="10,10" {
 entry:
   call void asm sideeffect "; clobber $0", "~{v[42:43]}"()
   ret void
@@ -48,5 +48,3 @@ entry:
   store i64 %exec, ptr addrspace(1) %ptr
   ret void
 }
-
-attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
diff --git a/llvm/test/CodeGen/AMDGPU/inline-maxbb.ll b/llvm/test/CodeGen/AMDGPU/inline-maxbb.ll
index cc9203a5e984f0..c6b63c088f8c4b 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-maxbb.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-maxbb.ll
@@ -36,7 +36,7 @@ define amdgpu_kernel void @caller(i32 %x) {
 
 
 ; inlinehint
-define i32 @callee_hint(i32 %x) #0 {
+define i32 @callee_hint(i32 %x) inlinehint {
 entry:
   %cc = icmp eq i32 %x, 1
   br i1 %cc, label %ret_res, label %mulx
@@ -66,5 +66,3 @@ define amdgpu_kernel void @caller_hint(i32 %x) {
   store volatile i32 %res, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { inlinehint }
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16.ll
index 5bd116d4a26983..86d7d3f9be71dc 100644
--- a/llvm/test/CodeGen/AMDGPU/inlineasm-16.ll
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16.ll
@@ -5,17 +5,17 @@
 ; GCN-LABEL: {{^}}s_input_output_i16:
 ; GCN: s_mov_b32 s[[REG:[0-9]+]], -1
 ; GCN: ; use s[[REG]]
-define amdgpu_kernel void @s_input_output_i16() #0 {
+define amdgpu_kernel void @s_input_output_i16() nounwind {
   %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"()
-  tail call void asm sideeffect "; use $0", "s"(i16 %v) #0
+  tail call void asm sideeffect "; use $0", "s"(i16 %v) nounwind
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_input_output_i16:
 ; GCN: v_mov_b32 v[[REG:[0-9]+]], -1
 ; GCN: ; use v[[REG]]
-define amdgpu_kernel void @v_input_output_i16() #0 {
-  %v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
+define amdgpu_kernel void @v_input_output_i16() nounwind {
+  %v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() nounwind
   tail call void asm sideeffect "; use $0", "v"(i16 %v)
   ret void
 }
@@ -23,8 +23,8 @@ define amdgpu_kernel void @v_input_output_i16() #0 {
 ; GCN-LABEL: {{^}}s_input_output_f16:
 ; GCN: s_mov_b32 s[[REG:[0-9]+]], -1
 ; GCN: ; use s[[REG]]
-define amdgpu_kernel void @s_input_output_f16() #0 {
-  %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0
+define amdgpu_kernel void @s_input_output_f16() nounwind {
+  %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() nounwind
   tail call void asm sideeffect "; use $0", "s"(half %v)
   ret void
 }
@@ -32,10 +32,8 @@ define amdgpu_kernel void @s_input_output_f16() #0 {
 ; GCN-LABEL: {{^}}v_input_output_f16:
 ; GCN: v_mov_b32 v[[REG:[0-9]+]], -1
 ; GCN: ; use v[[REG]]
-define amdgpu_kernel void @v_input_output_f16() #0 {
-  %v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
+define amdgpu_kernel void @v_input_output_f16() nounwind {
+  %v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() nounwind
   tail call void asm sideeffect "; use $0", "v"(half %v)
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-packed.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-packed.ll
index 46b2eb30c791c7..8721bf2c4316bd 100644
--- a/llvm/test/CodeGen/AMDGPU/inlineasm-packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-packed.ll
@@ -2,56 +2,54 @@
 
 ; GCN-LABEL: {{^}}inline_asm_input_v2i16:
 ; GCN: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @inline_asm_input_v2i16(ptr addrspace(1) %out, <2 x i16> %in) #0 {
+define amdgpu_kernel void @inline_asm_input_v2i16(ptr addrspace(1) %out, <2 x i16> %in) nounwind {
 entry:
-  %val = call i32 asm "s_mov_b32 $0, $1", "=r,r"(<2 x i16> %in) #0
+  %val = call i32 asm "s_mov_b32 $0, $1", "=r,r"(<2 x i16> %in) nounwind
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}inline_asm_input_v2f16:
 ; GCN: s_mov_b32 s0, s{{[0-9]+}}
-define amdgpu_kernel void @inline_asm_input_v2f16(ptr addrspace(1) %out, <2 x half> %in) #0 {
+define amdgpu_kernel void @inline_asm_input_v2f16(ptr addrspace(1) %out, <2 x half> %in) nounwind {
 entry:
-  %val = call i32 asm "s_mov_b32 $0, $1", "=r,r"(<2 x half> %in) #0
+  %val = call i32 asm "s_mov_b32 $0, $1", "=r,r"(<2 x half> %in) nounwind
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}inline_asm_output_v2i16:
 ; GCN: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @inline_asm_output_v2i16(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @inline_asm_output_v2i16(ptr addrspace(1) %out, i32 %in) nounwind {
 entry:
-  %val = call <2 x i16> asm "s_mov_b32 $0, $1", "=r,r"(i32 %in) #0
+  %val = call <2 x i16> asm "s_mov_b32 $0, $1", "=r,r"(i32 %in) nounwind
   store <2 x i16> %val, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}inline_asm_output_v2f16:
 ; GCN: v_mov_b32 v{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @inline_asm_output_v2f16(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @inline_asm_output_v2f16(ptr addrspace(1) %out, i32 %in) nounwind {
 entry:
-  %val = call <2 x half> asm "v_mov_b32 $0, $1", "=v,r"(i32 %in) #0
+  %val = call <2 x half> asm "v_mov_b32 $0, $1", "=v,r"(i32 %in) nounwind
   store <2 x half> %val, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}inline_asm_packed_v2i16:
 ; GCN: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @inline_asm_packed_v2i16(ptr addrspace(1) %out, <2 x i16> %in0, <2 x i16> %in1) #0 {
+define amdgpu_kernel void @inline_asm_packed_v2i16(ptr addrspace(1) %out, <2 x i16> %in0, <2 x i16> %in1) nounwind {
 entry:
-  %val = call <2 x i16> asm "v_pk_add_u16 $0, $1, $2", "=v,r,v"(<2 x i16> %in0, <2 x i16> %in1) #0
+  %val = call <2 x i16> asm "v_pk_add_u16 $0, $1, $2", "=v,r,v"(<2 x i16> %in0, <2 x i16> %in1) nounwind
   store <2 x i16> %val, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}inline_asm_packed_v2f16:
 ; GCN: v_pk_add_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @inline_asm_packed_v2f16(ptr addrspace(1) %out, <2 x half> %in0, <2 x half> %in1) #0 {
+define amdgpu_kernel void @inline_asm_packed_v2f16(ptr addrspace(1) %out, <2 x half> %in0, <2 x half> %in1) nounwind {
 entry:
-  %val = call <2 x half> asm "v_pk_add_f16 $0, $1, $2", "=v,r,v"(<2 x half> %in0, <2 x half> %in1) #0
+  %val = call <2 x half> asm "v_pk_add_f16 $0, $1, $2", "=v,r,v"(<2 x half> %in0, <2 x half> %in1) nounwind
   store <2 x half> %val, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-v16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-v16.ll
index 24bd8b4d774343..a979d72d79b7ab 100644
--- a/llvm/test/CodeGen/AMDGPU/inlineasm-v16.ll
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-v16.ll
@@ -111,5 +111,3 @@ define amdgpu_kernel void @v_input_output_v32i16() {
   tail call void asm sideeffect "; use $0", "v"(<32 x i16> %v)
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
index cddfb21a6fbdf4..36cac63b21bdc4 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
@@ -36,7 +36,7 @@ bb:
   ret void
 }
 
-define <2 x i64> @f1() #0 {
+define <2 x i64> @f1() noinline optnone {
 ; GFX11-LABEL: f1:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -245,7 +245,5 @@ bb43:
   unreachable
 }
 
-attributes #0 = { noinline optnone }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-callee.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-callee.mir
index 0ffed0ae4bfd59..ca2abce9cdc7b5 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-callee.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-callee.mir
@@ -1,10 +1,8 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck %s
 --- |
-  define float @entry_callee_wait(float %arg) #0 {
+  define float @entry_callee_wait(float %arg) nounwind {
     ret float %arg
   }
-
-  attributes #0 = { nounwind }
 ...
 ---
 # CHECK-LABEL: name: entry_callee_wait{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
index a142bddbae69a8..5ae91ac87b9353 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
@@ -1,7 +1,7 @@
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck %s
 --- |
   define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x
-  i32> inreg, i32 inreg %w, float %v) #0 {
+  i32> inreg, i32 inreg %w, float %v) nounwind {
     %a = load volatile float, ptr addrspace(1) undef
     %b = load volatile float, ptr addrspace(1) undef
     %c = load volatile float, ptr addrspace(1) undef
@@ -10,9 +10,7 @@
     ret <4 x float> <float 5.000000e-01, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>
   }
 
-  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
-  attributes #0 = { nounwind }
+  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
 
 ...
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 68427e8937bb94..031a6980018646 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -2273,7 +2273,7 @@ define amdgpu_kernel void @dynamic_insertelement_v4f64(ptr addrspace(1) %out, <4
   ret void
 }
 
-define amdgpu_kernel void @dynamic_insertelement_v8f64(ptr addrspace(1) %out, <8 x double> %a, i32 %b) #0 {
+define amdgpu_kernel void @dynamic_insertelement_v8f64(ptr addrspace(1) %out, <8 x double> %a, i32 %b) nounwind {
 ; SI-LABEL: dynamic_insertelement_v8f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s6, s[4:5], 0x20
@@ -2348,7 +2348,4 @@ define amdgpu_kernel void @dynamic_insertelement_v8f64(ptr addrspace(1) %out, <8
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index 1ba2491d2210ec..9f21b3c567497b 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CIVI,CI %s
 ; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
 
-define amdgpu_kernel void @s_insertelement_v2i16_0(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) #0 {
+define amdgpu_kernel void @s_insertelement_v2i16_0(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) nounwind {
 ; GFX9-LABEL: s_insertelement_v2i16_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -51,7 +51,7 @@ define amdgpu_kernel void @s_insertelement_v2i16_0(ptr addrspace(1) %out, ptr ad
 }
 
 
-define amdgpu_kernel void @s_insertelement_v2i16_0_reg(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i16 %elt) #0 {
+define amdgpu_kernel void @s_insertelement_v2i16_0_reg(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i16 %elt) nounwind {
 ; GFX9-LABEL: s_insertelement_v2i16_0_reg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -118,7 +118,7 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reg(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i16 %elt) #0 {
+define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i16 %elt) nounwind {
 ; GFX9-LABEL: s_insertelement_v2i16_0_multi_use_hi_reg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -200,11 +200,11 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(ptr addrspac
   %vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
   store <2 x i16> %vecins, ptr addrspace(1) %out
   %use1 = zext i16 %elt1 to i32
-  call void asm sideeffect "; use $0", "s"(i32 %use1) #0
+  call void asm sideeffect "; use $0", "s"(i32 %use1) nounwind
   ret void
 }
 
-define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i32 %elt.arg) #0 {
+define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i32 %elt.arg) nounwind {
 ; GFX9-LABEL: s_insertelement_v2i16_0_reghi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -271,7 +271,7 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, i32 %elt.arg) #0 {
+define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, i32 %elt.arg) nounwind {
 ; GFX9-LABEL: s_insertelement_v2i16_0_reghi_multi_use_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -352,11 +352,11 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(ptr addrspa
   %vecins = insertelement <2 x i16> %vec, i16 %elt, i32 0
   store <2 x i16> %vecins, ptr addrspace(1) %out
   %use1 = zext i16 %elt to i32
-  call void asm sideeffect "; use $0", "s"(i32 %use1) #0
+  call void asm sideeffect "; use $0", "s"(i32 %use1) nounwind
   ret void
 }
 
-define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, i32 %elt.arg) #0 {
+define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, i32 %elt.arg) nounwind {
 ; GFX9-LABEL: s_insertelement_v2i16_0_reghi_both_multi_use_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -454,12 +454,12 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_both_multi_use_1(ptr ad
   %use1 = zext i16 %elt to i32
   %vec.hi.use1 = zext i16 %vec.hi to i32
 
-  call void asm sideeffect "; use $0", "s"(i32 %use1) #0
-  call void asm sideeffect "; use $0", "s"(i32 %vec.hi.use1) #0
+  call void asm sideeffect "; use $0", "s"(i32 %use1) nounwind
+  call void asm sideeffect "; use $0", "s"(i32 %vec.hi.use1) nounwind
   ret void
 }
 
-define amdgpu_kernel void @s_insertelement_v2i16_1(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) #0 {
+define amdgpu_kernel void @s_insertelement_v2i16_1(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) nounwind {
 ; GFX9-LABEL: s_insertelement_v2i16_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -505,7 +505,7 @@ define amdgpu_kernel void @s_insertelement_v2i16_1(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @s_insertelement_v2i16_1_reg(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i16 %elt) #0 {
+define amdgpu_kernel void @s_insertelement_v2i16_1_reg(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, [8 x i32], i16 %elt) nounwind {
 ; GFX9-LABEL: s_insertelement_v2i16_1_reg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -572,7 +572,7 @@ define amdgpu_kernel void @s_insertelement_v2i16_1_reg(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @s_insertelement_v2f16_0(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) #0 {
+define amdgpu_kernel void @s_insertelement_v2f16_0(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) nounwind {
 ; GFX9-LABEL: s_insertelement_v2f16_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -620,7 +620,7 @@ define amdgpu_kernel void @s_insertelement_v2f16_0(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @s_insertelement_v2f16_1(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) #0 {
+define amdgpu_kernel void @s_insertelement_v2f16_1(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr) nounwind {
 ; GFX9-LABEL: s_insertelement_v2f16_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -666,7 +666,7 @@ define amdgpu_kernel void @s_insertelement_v2f16_1(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2i16_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_insertelement_v2i16_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_insertelement_v2i16_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -729,7 +729,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -739,7 +739,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2i16_0_reghi(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %elt.arg) #0 {
+define amdgpu_kernel void @v_insertelement_v2i16_0_reghi(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %elt.arg) nounwind {
 ; GFX9-LABEL: v_insertelement_v2i16_0_reghi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -804,7 +804,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_reghi(ptr addrspace(1) %out,
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -816,7 +816,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_reghi(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_insertelement_v2i16_0_inlineimm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -877,7 +877,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -888,7 +888,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(ptr addrspace(1) %o
 }
 
 ; FIXME: fold lshl_or c0, c1, v0 -> or (c0 << c1), v0
-define amdgpu_kernel void @v_insertelement_v2i16_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_insertelement_v2i16_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_insertelement_v2i16_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -951,7 +951,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_1(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -961,7 +961,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_1(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_insertelement_v2i16_1_inlineimm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1022,7 +1022,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1032,7 +1032,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2f16_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_insertelement_v2f16_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_insertelement_v2f16_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1095,7 +1095,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_0(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1105,7 +1105,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_0(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_insertelement_v2f16_0_inlineimm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1166,7 +1166,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1176,7 +1176,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2f16_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_insertelement_v2f16_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_insertelement_v2f16_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1239,7 +1239,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_1(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1249,7 +1249,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_1(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_insertelement_v2f16_1_inlineimm:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1310,7 +1310,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1321,7 +1321,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(ptr addrspace(1) %o
 }
 
 ; FIXME: Enable for others when argument load not split
-define amdgpu_kernel void @s_insertelement_v2i16_dynamic(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, ptr addrspace(4) %idx.ptr) #0 {
+define amdgpu_kernel void @s_insertelement_v2i16_dynamic(ptr addrspace(1) %out, ptr addrspace(4) %vec.ptr, ptr addrspace(4) %idx.ptr) nounwind {
 ; GFX9-LABEL: s_insertelement_v2i16_dynamic:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
@@ -1406,7 +1406,7 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) #0 {
+define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) nounwind {
 ; GFX9-LABEL: v_insertelement_v2i16_dynamic_sgpr:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1480,7 +1480,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(ptr addrspace(1) %
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1490,7 +1490,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %idx.ptr) #0 {
+define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %idx.ptr) nounwind {
 ; GFX9-LABEL: v_insertelement_v2f16_dynamic_vgpr:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
@@ -1582,7 +1582,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(ptr addrspace(1) %
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %idx.gep = getelementptr inbounds i32, ptr addrspace(1) %idx.ptr, i64 %tid.ext
@@ -1594,7 +1594,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v4f16_0(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %val) #0 {
+define amdgpu_kernel void @v_insertelement_v4f16_0(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %val) nounwind {
 ; GFX9-LABEL: v_insertelement_v4f16_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1661,7 +1661,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_0(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1673,7 +1673,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_0(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v4f16_1(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val) #0 {
+define amdgpu_kernel void @v_insertelement_v4f16_1(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val) nounwind {
 ; GFX9-LABEL: v_insertelement_v4f16_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1739,7 +1739,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_1(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1751,7 +1751,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_1(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v4f16_2(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %val) #0 {
+define amdgpu_kernel void @v_insertelement_v4f16_2(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %val) nounwind {
 ; GFX9-LABEL: v_insertelement_v4f16_2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1818,7 +1818,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_2(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1830,7 +1830,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_2(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v4f16_3(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val) #0 {
+define amdgpu_kernel void @v_insertelement_v4f16_3(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val) nounwind {
 ; GFX9-LABEL: v_insertelement_v4f16_3:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1896,7 +1896,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_3(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1908,7 +1908,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_3(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v4i16_2(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val) #0 {
+define amdgpu_kernel void @v_insertelement_v4i16_2(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val) nounwind {
 ; GFX9-LABEL: v_insertelement_v4i16_2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1975,7 +1975,7 @@ define amdgpu_kernel void @v_insertelement_v4i16_2(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -1988,7 +1988,7 @@ define amdgpu_kernel void @v_insertelement_v4i16_2(ptr addrspace(1) %out, ptr ad
 }
 
 ; FIXME: Better code on CI?
-define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val) #0 {
+define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val) nounwind {
 ; GFX9-LABEL: v_insertelement_v4i16_dynamic_vgpr:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2083,7 +2083,7 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(ptr addrspace(1) %
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -2096,7 +2096,7 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val, i32 %idxval) #0 {
+define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val, i32 %idxval) nounwind {
 ; GFX9-LABEL: v_insertelement_v4f16_dynamic_sgpr:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2184,7 +2184,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(ptr addrspace(1) %
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <4 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -2263,7 +2263,7 @@ define amdgpu_kernel void @v_insertelement_v8f16_3(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <8 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <8 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -2343,7 +2343,7 @@ define amdgpu_kernel void @v_insertelement_v8i16_6(ptr addrspace(1) %out, ptr ad
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <8 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <8 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -2570,7 +2570,7 @@ define amdgpu_kernel void @v_insertelement_v8f16_dynamic(ptr addrspace(1) %out,
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <8 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <8 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -2671,7 +2671,7 @@ define amdgpu_kernel void @v_insertelement_v16f16_3(ptr addrspace(1) %out, ptr a
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <16 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <16 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -2772,7 +2772,7 @@ define amdgpu_kernel void @v_insertelement_v16i16_6(ptr addrspace(1) %out, ptr a
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <16 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <16 x i16>, ptr addrspace(1) %out, i64 %tid.ext
@@ -3173,7 +3173,7 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <16 x half>, ptr addrspace(1) %in, i64 %tid.ext
   %out.gep = getelementptr inbounds <16 x half>, ptr addrspace(1) %out, i64 %tid.ext
@@ -3186,7 +3186,4 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
 }
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-nosaddr.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-nosaddr.ll
index 80ed8318d8abeb..c5fe472919c5ca 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-nosaddr.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-nosaddr.ll
@@ -18,8 +18,8 @@
 
 ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %idx.ptr) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %idx.ptr) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %idx.gep = getelementptr inbounds i32, ptr addrspace(1) %idx.ptr, i64 %tid.ext
@@ -32,7 +32,4 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(ptr addrspace(1) %
 }
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll
index c63fe3d6d2a37a..2ec06f7e3763ce 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll
@@ -16,8 +16,8 @@
 
 ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %idx.ptr) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %idx.ptr) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
   %idx.gep = getelementptr inbounds i32, ptr addrspace(1) %idx.ptr, i64 %tid.ext
@@ -30,7 +30,4 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(ptr addrspace(1) %
 }
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/internalize.ll b/llvm/test/CodeGen/AMDGPU/internalize.ll
index 6b2a4d5fc328b4..2543fe7dcc6770 100644
--- a/llvm/test/CodeGen/AMDGPU/internalize.ll
+++ b/llvm/test/CodeGen/AMDGPU/internalize.ll
@@ -12,7 +12,7 @@
 
 ; OPT: define internal fastcc void @func_used_noinline(
 ; OPT-NONE: define fastcc void @func_used_noinline(
-define fastcc void @func_used_noinline(ptr addrspace(1) %out, i32 %tid) #1 {
+define fastcc void @func_used_noinline(ptr addrspace(1) %out, i32 %tid) noinline nounwind {
 entry:
   store volatile i32 %tid, ptr addrspace(1) %out
   ret void
@@ -20,7 +20,7 @@ entry:
 
 ; OPTNONE: define fastcc void @func_used_alwaysinline(
 ; OPT-NOT: @func_used_alwaysinline
-define fastcc void @func_used_alwaysinline(ptr addrspace(1) %out, i32 %tid) #2 {
+define fastcc void @func_used_alwaysinline(ptr addrspace(1) %out, i32 %tid) alwaysinline nounwind {
 entry:
   store volatile i32 %tid, ptr addrspace(1) %out
   ret void
@@ -28,14 +28,14 @@ entry:
 
 ; OPTNONE: define void @func_unused(
 ; OPT-NOT: @func_unused
-define void @func_unused(ptr addrspace(1) %out, i32 %tid) #1 {
+define void @func_unused(ptr addrspace(1) %out, i32 %tid) noinline nounwind {
 entry:
   store volatile i32 %tid, ptr addrspace(1) %out
   ret void
 }
 
 ; ALL: define amdgpu_kernel void @kernel_unused(
-define amdgpu_kernel void @kernel_unused(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @kernel_unused(ptr addrspace(1) %out) noinline nounwind {
 entry:
   store volatile i32 1, ptr addrspace(1) %out
   ret void
@@ -54,8 +54,4 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { noinline nounwind }
-attributes #2 = { alwaysinline nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll b/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
index 6ad2ed3ff63d10..f1bc9ada1ba611 100644
--- a/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
@@ -10,7 +10,7 @@
 ; GCN-DAG: buffer_load_dwordx2 [[PTR:v\[[0-9]+:[0-9]+\]]],
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
 ; GCN: buffer_store_dword [[K]], [[PTR]]
-define amdgpu_kernel void @test_merge_store_constant_i16_invariant_global_pointer_load(ptr addrspace(1) dereferenceable(4096) nonnull %in) #0 {
+define amdgpu_kernel void @test_merge_store_constant_i16_invariant_global_pointer_load(ptr addrspace(1) dereferenceable(4096) nonnull %in) nounwind {
   %ptr = load ptr addrspace(1), ptr addrspace(1) %in, !invariant.load !0
   %ptr.1 = getelementptr i16, ptr addrspace(1) %ptr, i64 1
   store i16 123, ptr addrspace(1) %ptr, align 4
@@ -22,7 +22,7 @@ define amdgpu_kernel void @test_merge_store_constant_i16_invariant_global_pointe
 ; GCN: s_load_dwordx2 s[[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]]
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
 ; GCN: buffer_store_dword [[K]], off, s[[[SPTR_LO]]:
-define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(ptr addrspace(4) dereferenceable(4096) nonnull %in) #0 {
+define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_pointer_load(ptr addrspace(4) dereferenceable(4096) nonnull %in) nounwind {
   %ptr = load ptr addrspace(1), ptr addrspace(4) %in, !invariant.load !0
   %ptr.1 = getelementptr i16, ptr addrspace(1) %ptr, i64 1
   store i16 123, ptr addrspace(1) %ptr, align 4
@@ -31,5 +31,3 @@ define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_poin
 }
 
 !0 = !{}
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
index fa3bf72b1a97f9..22a5c8d69bb2d8 100644
--- a/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
@@ -1,7 +1,7 @@
 # RUN: llc -run-pass block-placement -mtriple=amdgcn -verify-machineinstrs -o - %s | FileCheck %s
 --- |
 
-  define amdgpu_kernel void @invert_br_undef_vcc(float %cond, ptr addrspace(1) %out) #0 {
+  define amdgpu_kernel void @invert_br_undef_vcc(float %cond, ptr addrspace(1) %out) nounwind {
   entry:
     br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
 
@@ -19,8 +19,6 @@
     ret void
   }
 
-  attributes #0 = { nounwind }
-
   !0 = !{}
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
index 492ad9561875c8..7143acd233227d 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
@@ -3,37 +3,37 @@
 ; Make sure the expected regmask is generated for sub/superregisters.
 
 ; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 {{$}}
-define void @csr() #0 {
-  call void asm sideeffect "", "~{v0},~{v44},~{v45}"() #0
+define void @csr() nounwind {
+  call void asm sideeffect "", "~{v0},~{v44},~{v45}"() nounwind
   ret void
 }
 
 ; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 {{$}}
-define void @subregs_for_super() #0 {
-  call void asm sideeffect "", "~{v0},~{v1}"() #0
+define void @subregs_for_super() nounwind {
+  call void asm sideeffect "", "~{v0},~{v1}"() nounwind
   ret void
 }
 
 ; CHECK-DAG: clobbered_reg_with_sub Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 {{$}}
-define void @clobbered_reg_with_sub() #0 {
-  call void asm sideeffect "", "~{v[0:1]}"() #0
+define void @clobbered_reg_with_sub() nounwind {
+  call void asm sideeffect "", "~{v[0:1]}"() nounwind
   ret void
 }
 
 ; CHECK-DAG: nothing Clobbered Registers: {{$}}
-define void @nothing() #0 {
+define void @nothing() nounwind {
   ret void
 }
 
 ; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 $m0_hi16 $m0_lo16 {{$}}
-define void @special_regs() #0 {
-  call void asm sideeffect "", "~{m0},~{scc}"() #0
+define void @special_regs() nounwind {
+  call void asm sideeffect "", "~{m0},~{scc}"() nounwind
   ret void
 }
 
 ; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo $vcc_hi_hi16 $vcc_hi_lo16 $vcc_lo_hi16 $vcc_lo_lo16 {{$}}
-define void @vcc() #0 {
-  call void asm sideeffect "", "~{vcc}"() #0
+define void @vcc() nounwind {
+  call void asm sideeffect "", "~{vcc}"() nounwind
   ret void
 }
 
@@ -43,7 +43,5 @@ define void @vcc() #0 {
                                          ptr @nothing,
                                          ptr @special_regs,
                                          ptr @vcc]
-
-attributes #0 = { nounwind }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll
index 29a96c227f2f0a..541d7a451119d1 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll
@@ -14,19 +14,19 @@
 %struct.MicrofacetExtra = type { <3 x float>, <3 x float>, <3 x float>, float, [12 x i8] }
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.fmuladd.f32(float, float, float) #0
+declare float @llvm.fmuladd.f32(float, float, float) nofree nosync nounwind readnone speculatable willreturn
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #0
+declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) nofree nosync nounwind readnone speculatable willreturn
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nofree nosync nounwind readnone speculatable willreturn
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1
+declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) argmemonly nofree nosync nounwind willreturn
 
 ; Function Attrs: norecurse
-define internal fastcc void @svm_node_closure_bsdf(ptr addrspace(1) %sd, ptr %stack, <4 x i32> %node, ptr %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, ptr addrspace(1) %arrayidx.i.i2202, ptr addrspace(1) %retval.0.i.i22089, ptr addrspace(1) %retval.1.i221310, i1 %cmp575, ptr addrspace(1) %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 {
+define internal fastcc void @svm_node_closure_bsdf(ptr addrspace(1) %sd, ptr %stack, <4 x i32> %node, ptr %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, ptr addrspace(1) %arrayidx.i.i2202, ptr addrspace(1) %retval.0.i.i22089, ptr addrspace(1) %retval.1.i221310, i1 %cmp575, ptr addrspace(1) %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) norecurse {
 ; GCN-LABEL: {{^}}svm_node_closure_bsdf:
 ; GCN-NOT: v_writelane_b32
 ; GCN: s_movk_i32 s26, 0x60
@@ -199,15 +199,10 @@ kernel_set_buffer_pointers.exit:
 }
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.fabs.f32(float) #0
+declare float @llvm.fabs.f32(float) nofree nosync nounwind readnone speculatable willreturn
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.maxnum.f32(float, float) #0
+declare float @llvm.maxnum.f32(float, float) nofree nosync nounwind readnone speculatable willreturn
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #3
-
-attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }
-attributes #1 = { argmemonly nofree nosync nounwind willreturn }
-attributes #2 = { norecurse }
-attributes #3 = { nounwind readnone speculatable willreturn }
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll
index 6c8646968b6762..27860ea39e0367 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra.ll
@@ -4,7 +4,7 @@
 ; Kernels are not called, so there is no call preserved mask.
 ; GCN-LABEL: {{^}}kernel:
 ; GCN: flat_store_dword
-define amdgpu_kernel void @kernel(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @kernel(ptr addrspace(1) %out) nounwind {
 entry:
   store i32 0, ptr addrspace(1) %out
   ret void
@@ -12,8 +12,8 @@ entry:
 
 ; GCN-LABEL: {{^}}func:
 ; GCN: ; NumVgprs: 8
-define hidden void @func() #1 {
-  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
+define hidden void @func() nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
+  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() nounwind
   ret void
 }
 
@@ -32,7 +32,7 @@ define hidden void @func() #1 {
 
 ; GCN: ; NumSgprs: 37
 ; GCN: ; NumVgprs: 9
-define amdgpu_kernel void @kernel_call() #0 {
+define amdgpu_kernel void @kernel_call() nounwind {
   %vgpr = load volatile i32, ptr addrspace(1) undef
   tail call void @func()
   store volatile i32 %vgpr, ptr addrspace(1) undef
@@ -50,7 +50,7 @@ define amdgpu_kernel void @kernel_call() #0 {
 
 ; GCN: ; NumSgprs: 34
 ; GCN: ; NumVgprs: 10
-define void @func_regular_call() #1 {
+define void @func_regular_call() nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
   %vgpr = load volatile i32, ptr addrspace(1) undef
   tail call void @func()
   store volatile i32 %vgpr, ptr addrspace(1) undef
@@ -66,7 +66,7 @@ define void @func_regular_call() #1 {
 
 ; GCN: ; NumSgprs: 32
 ; GCN: ; NumVgprs: 8
-define void @func_tail_call() #1 {
+define void @func_tail_call() nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
   tail call void @func()
   ret void
 }
@@ -79,7 +79,7 @@ define void @func_tail_call() #1 {
 
 ; GCN: ; NumSgprs: 34
 ; GCN: ; NumVgprs: 10
-define void @func_call_tail_call() #1 {
+define void @func_call_tail_call() nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
   %vgpr = load volatile i32, ptr addrspace(1) undef
   tail call void @func()
   store volatile i32 %vgpr, ptr addrspace(1) undef
@@ -99,7 +99,7 @@ define void @void_func_void() noinline {
 ; GCN: s_swappc_b64
 ; GCN-NOT: s32
 ; GCN: s_swappc_b64
-define void @test_funcx2() #0 {
+define void @test_funcx2() nounwind {
   call void @void_func_void()
   call void @void_func_void()
   ret void
@@ -108,13 +108,13 @@ define void @test_funcx2() #0 {
 ; GCN-LABEL: {{^}}wombat:
 define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
 bb:
-  call void @hoge() #0
+  call void @hoge() nounwind
   ret void
 }
 
 ; Make sure we save/restore the return address around the call.
 ; Function Attrs: norecurse
-define internal void @hoge() #2 {
+define internal void @hoge() norecurse {
 bb:
 ; GCN-LABEL: {{^}}hoge:
 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30,
@@ -129,8 +129,3 @@ bb:
 }
 
 declare dso_local void @eggs()
-
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-attributes #2 = { norecurse }
diff --git a/llvm/test/CodeGen/AMDGPU/kcache-fold.ll b/llvm/test/CodeGen/AMDGPU/kcache-fold.ll
index c3b41af529ecd1..2afcf77675b8db 100644
--- a/llvm/test/CodeGen/AMDGPU/kcache-fold.ll
+++ b/llvm/test/CodeGen/AMDGPU/kcache-fold.ll
@@ -2,7 +2,7 @@
 
 ; CHECK: {{^}}main1:
 ; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
-define amdgpu_kernel void @main1() #0 {
+define amdgpu_kernel void @main1() nounwind {
 main_body:
   %tmp = load <4 x float>, ptr addrspace(8) null
   %tmp7 = extractelement <4 x float> %tmp, i32 0
@@ -54,7 +54,7 @@ main_body:
 
 ; CHECK: {{^}}main2:
 ; CHECK-NOT: MOV
-define amdgpu_kernel void @main2() #0 {
+define amdgpu_kernel void @main2() nounwind {
 main_body:
   %tmp = load <4 x float>, ptr addrspace(8) null
   %tmp7 = extractelement <4 x float> %tmp, i32 0
@@ -104,9 +104,6 @@ main_body:
   ret void
 }
 
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #0
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) nounwind
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir
index 7a913cf50ea2bb..4721957706f2e6 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir
+++ b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir
@@ -5,11 +5,9 @@
 # when the frame pointer is enabled. That limitation is now removed and this test should compile without any crash.
 
 --- |
-  define amdgpu_kernel void @kernel_vgpr32_spill() #0 {
+  define amdgpu_kernel void @kernel_vgpr32_spill() "frame-pointer"="all" {
     ret void
   }
-
-  attributes #0 = { "frame-pointer"="all"}
 ...
 ---
 name: kernel_vgpr32_spill
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
index 1acbb091182802..581fd67ab4b186 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
@@ -4,7 +4,7 @@
 ; The forced spill to preserve the scratch VGPR require the voffset to hold the large offset
 ; value in the MUBUF instruction being emitted before s_cbranch_scc1 as it clobbers the SCC.
 
-define amdgpu_kernel void @test_kernel(i32 %val) #0 {
+define amdgpu_kernel void @test_kernel(i32 %val) nounwind "frame-pointer"="all" {
 ; CHECK-LABEL: test_kernel:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_mov_b32 s32, 0x180000
@@ -113,7 +113,5 @@ end:
 
 declare void @device_func(ptr addrspace(5))
 
-attributes #0 = { nounwind "frame-pointer"="all" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
index 3e0ad65c498213..692b0001a11ba2 100644
--- a/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
@@ -10,7 +10,7 @@
 ; tests.
 
 ; FIXME: Immediate value 0x41200000 should be folded into the v_cmp instruction.
-define amdgpu_ps void @return_void(float %0) #0 {
+define amdgpu_ps void @return_void(float %0) nounwind {
 ; CHECK-LABEL: return_void:
 ; CHECK:       ; %bb.0: ; %main_body
 ; CHECK-NEXT:    s_mov_b64 s[0:1], exec
@@ -54,7 +54,7 @@ end:
   ret void
 }
 
-define amdgpu_ps void @return_void_compr(float %0) #0 {
+define amdgpu_ps void @return_void_compr(float %0) nounwind {
 ; CHECK-LABEL: return_void_compr:
 ; CHECK:       ; %bb.0: ; %main_body
 ; CHECK-NEXT:    s_mov_b64 s[0:1], exec
@@ -98,7 +98,7 @@ end:
 }
 
 ; test the case where there's only a kill in an infinite loop
-define amdgpu_ps void @only_kill() #0 {
+define amdgpu_ps void @only_kill() nounwind {
 ; CHECK-LABEL: only_kill:
 ; CHECK:       ; %bb.0: ; %main_body
 ; CHECK-NEXT:    s_mov_b64 s[0:1], exec
@@ -126,7 +126,7 @@ loop:
 }
 
 ; Check that the epilog is the final block
-define amdgpu_ps float @return_nonvoid(float %0) #0 {
+define amdgpu_ps float @return_nonvoid(float %0) nounwind {
 ; CHECK-LABEL: return_nonvoid:
 ; CHECK:       ; %bb.0: ; %main_body
 ; CHECK-NEXT:    s_mov_b64 s[0:1], exec
@@ -165,8 +165,6 @@ end:
   ret float 0.
 }
 
-declare void @llvm.amdgcn.kill(i1) #0
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
-declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.kill(i1) nounwind
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind
+declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/known-never-nan.ll b/llvm/test/CodeGen/AMDGPU/known-never-nan.ll
index 34aecd77c2506f..6b5c1e631e6828 100644
--- a/llvm/test/CodeGen/AMDGPU/known-never-nan.ll
+++ b/llvm/test/CodeGen/AMDGPU/known-never-nan.ll
@@ -22,7 +22,7 @@ define half @known_nnan_extract_vector_elt(float %a, float %b, i32 %idx, half %c
 }
 
 ; should not emit v_max
-define float @fma_not_fmaxnm_maybe_nan(i32 %i1, i32 %i2, i32 %i3) #0 {
+define float @fma_not_fmaxnm_maybe_nan(i32 %i1, i32 %i2, i32 %i3) nounwind "no-signed-zeros-fp-math"="true" "target-features"="+mad-mac-f32-insts" {
 ; CHECK-LABEL: fma_not_fmaxnm_maybe_nan:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -46,5 +46,3 @@ declare float @llvm.amdgcn.fmad.ftz.f32(float, float, float)
 
 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float)
 declare half @llvm.canonicalize.f16(half)
-
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" "target-features"="+mad-mac-f32-insts" }
diff --git a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
index 64948c374e4ddc..1937caed36c650 100644
--- a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
+++ b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
@@ -4,7 +4,7 @@
 ; Mostly overlaps with fmed3.ll to stress specific cases of
 ; isKnownNeverSNaN.
 
-define float @v_test_known_not_snan_fabs_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_fabs_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fabs_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -18,7 +18,7 @@ define float @v_test_known_not_snan_fabs_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_fneg_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_fneg_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fneg_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -32,7 +32,7 @@ define float @v_test_known_not_snan_fneg_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_fpext_input_fmed3_r_i_i_f32(half %a) #0 {
+define float @v_test_known_not_snan_fpext_input_fmed3_r_i_i_f32(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fpext_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -47,7 +47,7 @@ define float @v_test_known_not_snan_fpext_input_fmed3_r_i_i_f32(half %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_fptrunc_input_fmed3_r_i_i_f32(double %a) #0 {
+define float @v_test_known_not_snan_fptrunc_input_fmed3_r_i_i_f32(double %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fptrunc_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -62,7 +62,7 @@ define float @v_test_known_not_snan_fptrunc_input_fmed3_r_i_i_f32(double %a) #0
   ret float %med
 }
 
-define float @v_test_known_not_snan_copysign_input_fmed3_r_i_i_f32(float %a, float %sign) #0 {
+define float @v_test_known_not_snan_copysign_input_fmed3_r_i_i_f32(float %a, float %sign) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_copysign_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -79,7 +79,7 @@ define float @v_test_known_not_snan_copysign_input_fmed3_r_i_i_f32(float %a, flo
 }
 
 ; Canonicalize always quiets, so nothing is necessary.
-define float @v_test_known_canonicalize_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_canonicalize_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_canonicalize_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -92,7 +92,7 @@ define float @v_test_known_canonicalize_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_minnum_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_test_known_not_snan_minnum_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_minnum_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -109,7 +109,7 @@ define float @v_test_known_not_snan_minnum_input_fmed3_r_i_i_f32(float %a, float
   ret float %med
 }
 
-define float @v_test_known_not_minnum_maybe_nan_src0_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_test_known_not_minnum_maybe_nan_src0_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_minnum_maybe_nan_src0_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -125,7 +125,7 @@ define float @v_test_known_not_minnum_maybe_nan_src0_input_fmed3_r_i_i_f32(float
   ret float %med
 }
 
-define float @v_test_known_not_minnum_maybe_nan_src1_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_test_known_not_minnum_maybe_nan_src1_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_minnum_maybe_nan_src1_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -141,7 +141,7 @@ define float @v_test_known_not_minnum_maybe_nan_src1_input_fmed3_r_i_i_f32(float
   ret float %med
 }
 
-define float @v_minnum_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_minnum_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_minnum_possible_nan_lhs_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -157,7 +157,7 @@ define float @v_minnum_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b
   ret float %med
 }
 
-define float @v_minnum_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_minnum_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_minnum_possible_nan_rhs_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -173,7 +173,7 @@ define float @v_minnum_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b
   ret float %med
 }
 
-define float @v_test_known_not_snan_maxnum_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_test_known_not_snan_maxnum_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_maxnum_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -190,7 +190,7 @@ define float @v_test_known_not_snan_maxnum_input_fmed3_r_i_i_f32(float %a, float
   ret float %med
 }
 
-define float @v_maxnum_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_maxnum_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_maxnum_possible_nan_lhs_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -206,7 +206,7 @@ define float @v_maxnum_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b
   ret float %med
 }
 
-define float @v_maxnum_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_maxnum_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_maxnum_possible_nan_rhs_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -222,7 +222,7 @@ define float @v_maxnum_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b
   ret float %med
 }
 
-define float @v_test_known_not_snan_select_input_fmed3_r_i_i_f32(float %a, float %b, i32 %c) #0 {
+define float @v_test_known_not_snan_select_input_fmed3_r_i_i_f32(float %a, float %b, i32 %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_select_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -241,7 +241,7 @@ define float @v_test_known_not_snan_select_input_fmed3_r_i_i_f32(float %a, float
   ret float %med
 }
 
-define float @v_select_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b, i32 %c) #0 {
+define float @v_select_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b, i32 %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_select_possible_nan_lhs_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -259,7 +259,7 @@ define float @v_select_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b
   ret float %med
 }
 
-define float @v_select_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b, i32 %c) #0 {
+define float @v_select_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b, i32 %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_select_possible_nan_rhs_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -277,7 +277,7 @@ define float @v_select_possible_nan_rhs_input_fmed3_r_i_i_f32(float %a, float %b
   ret float %med
 }
 
-define float @v_test_known_not_snan_fadd_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_test_known_not_snan_fadd_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fadd_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -290,7 +290,7 @@ define float @v_test_known_not_snan_fadd_input_fmed3_r_i_i_f32(float %a, float %
   ret float %med
 }
 
-define float @v_test_known_not_snan_fsub_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_test_known_not_snan_fsub_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fsub_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -303,7 +303,7 @@ define float @v_test_known_not_snan_fsub_input_fmed3_r_i_i_f32(float %a, float %
   ret float %med
 }
 
-define float @v_test_known_not_snan_fmul_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_test_known_not_snan_fmul_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fmul_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -316,7 +316,7 @@ define float @v_test_known_not_snan_fmul_input_fmed3_r_i_i_f32(float %a, float %
   ret float %med
 }
 
-define float @v_test_known_not_snan_uint_to_fp_input_fmed3_r_i_i_f32(i32 %a) #0 {
+define float @v_test_known_not_snan_uint_to_fp_input_fmed3_r_i_i_f32(i32 %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_uint_to_fp_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -329,7 +329,7 @@ define float @v_test_known_not_snan_uint_to_fp_input_fmed3_r_i_i_f32(i32 %a) #0
   ret float %med
 }
 
-define float @v_test_known_not_snan_sint_to_fp_input_fmed3_r_i_i_f32(i32 %a) #0 {
+define float @v_test_known_not_snan_sint_to_fp_input_fmed3_r_i_i_f32(i32 %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_sint_to_fp_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -342,7 +342,7 @@ define float @v_test_known_not_snan_sint_to_fp_input_fmed3_r_i_i_f32(i32 %a) #0
   ret float %med
 }
 
-define float @v_test_known_not_snan_fma_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
+define float @v_test_known_not_snan_fma_input_fmed3_r_i_i_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fma_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -355,7 +355,7 @@ define float @v_test_known_not_snan_fma_input_fmed3_r_i_i_f32(float %a, float %b
   ret float %med
 }
 
-define float @v_test_known_not_snan_fmad_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
+define float @v_test_known_not_snan_fmad_input_fmed3_r_i_i_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fmad_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -369,7 +369,7 @@ define float @v_test_known_not_snan_fmad_input_fmed3_r_i_i_f32(float %a, float %
 }
 
 
-define float @v_test_known_not_snan_sin_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_sin_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_sin_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -384,7 +384,7 @@ define float @v_test_known_not_snan_sin_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_cos_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_cos_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_cos_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -399,7 +399,7 @@ define float @v_test_known_not_snan_cos_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_exp2_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_exp2_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_exp2_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -412,7 +412,7 @@ define float @v_test_known_not_snan_exp2_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_trunc_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_trunc_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_trunc_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -425,7 +425,7 @@ define float @v_test_known_not_snan_trunc_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_floor_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_floor_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_floor_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -438,7 +438,7 @@ define float @v_test_known_not_snan_floor_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_ceil_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_ceil_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_ceil_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -451,7 +451,7 @@ define float @v_test_known_not_snan_ceil_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_round_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_round_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_round_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -470,7 +470,7 @@ define float @v_test_known_not_snan_round_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_rint_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_rint_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_rint_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -483,7 +483,7 @@ define float @v_test_known_not_snan_rint_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_nearbyint_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_nearbyint_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_nearbyint_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -496,7 +496,7 @@ define float @v_test_known_not_snan_nearbyint_input_fmed3_r_i_i_f32(float %a) #0
   ret float %med
 }
 
-define float @v_test_known_not_snan_fmul_legacy_input_fmed3_r_i_i_f32(float %a, float %b) #0 {
+define float @v_test_known_not_snan_fmul_legacy_input_fmed3_r_i_i_f32(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fmul_legacy_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -509,7 +509,7 @@ define float @v_test_known_not_snan_fmul_legacy_input_fmed3_r_i_i_f32(float %a,
   ret float %med
 }
 
-define float @v_test_known_not_snan_ldexp_input_fmed3_r_i_i_f32(float %a, i32 %b) #0 {
+define float @v_test_known_not_snan_ldexp_input_fmed3_r_i_i_f32(float %a, i32 %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_ldexp_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -522,7 +522,7 @@ define float @v_test_known_not_snan_ldexp_input_fmed3_r_i_i_f32(float %a, i32 %b
   ret float %med
 }
 
-define float @v_test_known_not_snan_fmed3_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
+define float @v_test_known_not_snan_fmed3_input_fmed3_r_i_i_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fmed3_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -536,7 +536,7 @@ define float @v_test_known_not_snan_fmed3_input_fmed3_r_i_i_f32(float %a, float
   ret float %med
 }
 
-define float @v_test_known_not_snan_fmin3_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
+define float @v_test_known_not_snan_fmin3_input_fmed3_r_i_i_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fmin3_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -550,7 +550,7 @@ define float @v_test_known_not_snan_fmin3_input_fmed3_r_i_i_f32(float %a, float
   ret float %med
 }
 
-define float @v_test_known_not_snan_cvt_ubyte0_input_fmed3_r_i_i_f32(i8 %char) #0 {
+define float @v_test_known_not_snan_cvt_ubyte0_input_fmed3_r_i_i_f32(i8 %char) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_cvt_ubyte0_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -563,7 +563,7 @@ define float @v_test_known_not_snan_cvt_ubyte0_input_fmed3_r_i_i_f32(i8 %char) #
   ret float %med
 }
 
-define float @v_test_not_known_frexp_mant_input_fmed3_r_i_i_f32(float %arg) #0 {
+define float @v_test_not_known_frexp_mant_input_fmed3_r_i_i_f32(float %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_not_known_frexp_mant_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -576,7 +576,7 @@ define float @v_test_not_known_frexp_mant_input_fmed3_r_i_i_f32(float %arg) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_frexp_mant_input_fmed3_r_i_i_f32(float %arg) #0 {
+define float @v_test_known_not_frexp_mant_input_fmed3_r_i_i_f32(float %arg) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_frexp_mant_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -591,7 +591,7 @@ define float @v_test_known_not_frexp_mant_input_fmed3_r_i_i_f32(float %arg) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_rcp_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_rcp_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_rcp_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -603,7 +603,7 @@ define float @v_test_known_not_snan_rcp_input_fmed3_r_i_i_f32(float %a) #0 {
   %med = call float @llvm.minnum.f32(float %max, float 4.0)
   ret float %med
 }
-define float @v_test_known_not_snan_rsq_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_rsq_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_rsq_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -616,7 +616,7 @@ define float @v_test_known_not_snan_rsq_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_fract_input_fmed3_r_i_i_f32(float %a) #0 {
+define float @v_test_known_not_snan_fract_input_fmed3_r_i_i_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_fract_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -629,7 +629,7 @@ define float @v_test_known_not_snan_fract_input_fmed3_r_i_i_f32(float %a) #0 {
   ret float %med
 }
 
-define float @v_test_known_not_snan_cubeid_input_fmed3_r_i_i_f32(float %a, float %b, float %c) #0 {
+define float @v_test_known_not_snan_cubeid_input_fmed3_r_i_i_f32(float %a, float %b, float %c) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GCN-LABEL: v_test_known_not_snan_cubeid_input_fmed3_r_i_i_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -642,32 +642,29 @@ define float @v_test_known_not_snan_cubeid_input_fmed3_r_i_i_f32(float %a, float
   ret float %med
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.sin.f32(float) #1
-declare float @llvm.cos.f32(float) #1
-declare float @llvm.exp2.f32(float) #1
-declare float @llvm.trunc.f32(float) #1
-declare float @llvm.floor.f32(float) #1
-declare float @llvm.ceil.f32(float) #1
-declare float @llvm.round.f32(float) #1
-declare float @llvm.rint.f32(float) #1
-declare float @llvm.nearbyint.f32(float) #1
-declare float @llvm.canonicalize.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.copysign.f32(float, float) #1
-declare float @llvm.fma.f32(float, float, float) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare float @llvm.ldexp.f32.i32(float, i32) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
-declare float @llvm.amdgcn.frexp.mant.f32(float) #1
-declare float @llvm.amdgcn.rcp.f32(float) #1
-declare float @llvm.amdgcn.rsq.f32(float) #1
-declare float @llvm.amdgcn.fract.f32(float) #1
-declare float @llvm.amdgcn.cubeid(float, float, float) #0
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone speculatable }
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable
+declare float @llvm.sin.f32(float) nounwind readnone speculatable
+declare float @llvm.cos.f32(float) nounwind readnone speculatable
+declare float @llvm.exp2.f32(float) nounwind readnone speculatable
+declare float @llvm.trunc.f32(float) nounwind readnone speculatable
+declare float @llvm.floor.f32(float) nounwind readnone speculatable
+declare float @llvm.ceil.f32(float) nounwind readnone speculatable
+declare float @llvm.round.f32(float) nounwind readnone speculatable
+declare float @llvm.rint.f32(float) nounwind readnone speculatable
+declare float @llvm.nearbyint.f32(float) nounwind readnone speculatable
+declare float @llvm.canonicalize.f32(float) nounwind readnone speculatable
+declare float @llvm.minnum.f32(float, float) nounwind readnone speculatable
+declare float @llvm.maxnum.f32(float, float) nounwind readnone speculatable
+declare float @llvm.copysign.f32(float, float) nounwind readnone speculatable
+declare float @llvm.fma.f32(float, float, float) nounwind readnone speculatable
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone speculatable
+declare float @llvm.ldexp.f32.i32(float, i32) nounwind readnone speculatable
+declare float @llvm.amdgcn.fmul.legacy(float, float) nounwind readnone speculatable
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone speculatable
+declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone speculatable
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone speculatable
+declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone speculatable
+declare float @llvm.amdgcn.fract.f32(float) nounwind readnone speculatable
+declare float @llvm.amdgcn.cubeid(float, float, float) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign"
 
 !0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
index cb6073e9341e04..1891ba1cf2a080 100644
--- a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -57,7 +57,7 @@
 
 ; Scratch size = alloca size + emergency stack slot, align {{.*}}, addrspace(5)
 ; ALL: ; ScratchSize: 32772
-define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) nounwind {
   %large = alloca [8192 x i32], align 4, addrspace(5)
   %gep = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 8191
   store volatile i32 %x, ptr addrspace(5) %gep
@@ -67,7 +67,5 @@ define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
   ret void
 }
 
-attributes #0 = { nounwind  }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll
index 6531edeaec5246..7883ffb13c5e34 100644
--- a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll
@@ -18,7 +18,7 @@
 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[4:7], 0 offen
 
 ; ALL: ; ScratchSize: 32772
-define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
+define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) nounwind {
   %large = alloca [8192 x i32], align 4, addrspace(5)
   %gep = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 8191
   store volatile i32 %x, ptr addrspace(5) %gep
@@ -44,7 +44,7 @@ define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[4:7], 0 offen
 
 ; ALL: ; ScratchSize: 32772
-define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 {
+define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) nounwind {
   %large = alloca [8192 x i32], align 4, addrspace(5)
   %gep = getelementptr [8192 x i32], ptr addrspace(5) %large, i32 0, i32 8191
   store volatile i32 %x, ptr addrspace(5) %gep
@@ -53,5 +53,3 @@ define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %
   store volatile i32 %val, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind  }
diff --git a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
index c18d9941bfd53b..c72796c3788d2a 100644
--- a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
@@ -6,7 +6,7 @@
 ; SI-NOT: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] poison, align 4
 ; CI: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] poison, align 4
 
-define amdgpu_kernel void @promote_alloca_size_63(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 {
+define amdgpu_kernel void @promote_alloca_size_63(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-flat-work-group-size"="63,63" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -27,7 +27,7 @@ entry:
 
 ; ALL: @promote_alloca_size_256.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] poison, align 4
 
-define amdgpu_kernel void @promote_alloca_size_256(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #1 {
+define amdgpu_kernel void @promote_alloca_size_256(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="256,256" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -50,7 +50,7 @@ entry:
 ; CI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] poison, align 4
 ; GFX10PLUS: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] poison, align 4
 
-define amdgpu_kernel void @promote_alloca_size_1600(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #2 {
+define amdgpu_kernel void @promote_alloca_size_1600(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,9" "amdgpu-flat-work-group-size"="1024,1024" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -72,7 +72,7 @@ entry:
 ; ALL-LABEL: @occupancy_0(
 ; CI-NOT: alloca [5 x i32]
 ; SI: alloca [5 x i32]
-define amdgpu_kernel void @occupancy_0(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #3 {
+define amdgpu_kernel void @occupancy_0(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,10" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -94,7 +94,7 @@ entry:
 ; ALL-LABEL: @occupancy_max(
 ; CI-NOT: alloca [5 x i32]
 ; SI: alloca [5 x i32]
-define amdgpu_kernel void @occupancy_max(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #4 {
+define amdgpu_kernel void @occupancy_max(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,10" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -117,7 +117,7 @@ entry:
 ; CI-LABEL: @occupancy_6(
 ; SI: alloca
 ; CI-NOT: alloca
-define amdgpu_kernel void @occupancy_6(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #5 {
+define amdgpu_kernel void @occupancy_6(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,6" "amdgpu-flat-work-group-size"="64,64" {
 entry:
   %stack = alloca [42 x i8], align 4, addrspace(5)
   %tmp = load i8, ptr addrspace(1) %in, align 1
@@ -142,7 +142,7 @@ entry:
 ; SICI: alloca [43 x i8]
 ; GFX10PLUS-NOT: alloca
 
-define amdgpu_kernel void @occupancy_6_over(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #5 {
+define amdgpu_kernel void @occupancy_6_over(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,6" "amdgpu-flat-work-group-size"="64,64" {
 entry:
   %stack = alloca [43 x i8], align 4, addrspace(5)
   %tmp = load i8, ptr addrspace(1) %in, align 1
@@ -167,7 +167,7 @@ entry:
 ; CI-LABEL: @occupancy_8(
 ; SI: alloca
 ; CI-NOT: alloca
-define amdgpu_kernel void @occupancy_8(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #6 {
+define amdgpu_kernel void @occupancy_8(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,8" "amdgpu-flat-work-group-size"="64,64" {
 entry:
   %stack = alloca [32 x i8], align 4, addrspace(5)
   %tmp = load i8, ptr addrspace(1) %in, align 1
@@ -192,7 +192,7 @@ entry:
 ; SICI: alloca [33 x i8]
 ; GFX10PLUS-NOT: alloca
 
-define amdgpu_kernel void @occupancy_8_over(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #6 {
+define amdgpu_kernel void @occupancy_8_over(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,8" "amdgpu-flat-work-group-size"="64,64" {
 entry:
   %stack = alloca [33 x i8], align 4, addrspace(5)
   %tmp = load i8, ptr addrspace(1) %in, align 1
@@ -217,7 +217,7 @@ entry:
 ; CI-LABEL: @occupancy_9(
 ; SI: alloca
 ; CI-NOT: alloca
-define amdgpu_kernel void @occupancy_9(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #7 {
+define amdgpu_kernel void @occupancy_9(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,9" "amdgpu-flat-work-group-size"="64,64" {
 entry:
   %stack = alloca [28 x i8], align 4, addrspace(5)
   %tmp = load i8, ptr addrspace(1) %in, align 1
@@ -242,7 +242,7 @@ entry:
 ; SICI: alloca [29 x i8]
 ; GFX10PLUS-NOT: alloca
 
-define amdgpu_kernel void @occupancy_9_over(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #7 {
+define amdgpu_kernel void @occupancy_9_over(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,9" "amdgpu-flat-work-group-size"="64,64" {
 entry:
   %stack = alloca [29 x i8], align 4, addrspace(5)
   %tmp = load i8, ptr addrspace(1) %in, align 1
@@ -262,12 +262,3 @@ entry:
   store i8 %tmp3, ptr addrspace(1) %arrayidx13, align 1
   ret void
 }
-
-attributes #0 = { nounwind "amdgpu-flat-work-group-size"="63,63" }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="256,256" }
-attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,9" "amdgpu-flat-work-group-size"="1024,1024" }
-attributes #3 = { nounwind "amdgpu-waves-per-eu"="1,10" }
-attributes #4 = { nounwind "amdgpu-waves-per-eu"="1,10" }
-attributes #5 = { nounwind "amdgpu-waves-per-eu"="1,6" "amdgpu-flat-work-group-size"="64,64" }
-attributes #6 = { nounwind "amdgpu-waves-per-eu"="1,8" "amdgpu-flat-work-group-size"="64,64" }
-attributes #7 = { nounwind "amdgpu-waves-per-eu"="1,9" "amdgpu-flat-work-group-size"="64,64" }
diff --git a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll
index 8c23ace9b014bf..3b9bddc7821a29 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll
@@ -9,13 +9,13 @@
 @lds.missing.align.0 = internal unnamed_addr addrspace(3) global [39 x i32] undef
 @lds.missing.align.1 = internal unnamed_addr addrspace(3) global [7 x i64] undef
 
-declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) #0
-declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #0
+declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) argmemonly nounwind
+declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) argmemonly nounwind
 
 
 ; HSA-LABEL: {{^}}test_no_round_size_1:
 ; HSA: .amdhsa_group_segment_fixed_size 38
-define amdgpu_kernel void @test_no_round_size_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_no_round_size_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.align16.0, ptr addrspace(1) align 4 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.align16.0, i32 38, i1 false)
   ret void
@@ -32,7 +32,7 @@ define amdgpu_kernel void @test_no_round_size_1(ptr addrspace(1) %out, ptr addrs
 
 ; HSA-LABEL: {{^}}test_round_size_2:
 ; HSA: .amdhsa_group_segment_fixed_size 86
-define amdgpu_kernel void @test_round_size_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_round_size_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.align16.0, ptr addrspace(1) align 4 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.align16.0, i32 38, i1 false)
 
@@ -45,7 +45,7 @@ define amdgpu_kernel void @test_round_size_2(ptr addrspace(1) %out, ptr addrspac
 ; 38 + (10 pad) + 38  (= 86)
 ; HSA-LABEL: {{^}}test_round_size_2_align_8:
 ; HSA: .amdhsa_group_segment_fixed_size 86
-define amdgpu_kernel void @test_round_size_2_align_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_round_size_2_align_8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align16.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align16.0, i32 38, i1 false)
 
@@ -57,7 +57,7 @@ define amdgpu_kernel void @test_round_size_2_align_8(ptr addrspace(1) %out, ptr
 
 ; HSA-LABEL: {{^}}test_round_local_lds_and_arg:
 ; HSA: .amdhsa_group_segment_fixed_size 38
-define amdgpu_kernel void @test_round_local_lds_and_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) %lds.arg) #1 {
+define amdgpu_kernel void @test_round_local_lds_and_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) %lds.arg) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.align16.0, ptr addrspace(1) align 4 %in, i32 38, i1 false)
 
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.align16.0, i32 38, i1 false)
@@ -68,7 +68,7 @@ define amdgpu_kernel void @test_round_local_lds_and_arg(ptr addrspace(1) %out, p
 
 ; HSA-LABEL: {{^}}test_round_lds_arg:
 ; HSA: .amdhsa_group_segment_fixed_size 0
-define amdgpu_kernel void @test_round_lds_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) %lds.arg) #1 {
+define amdgpu_kernel void @test_round_lds_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) %lds.arg) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 %lds.arg, ptr addrspace(1) align 4 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 %lds.arg, i32 38, i1 false)
   ret void
@@ -77,7 +77,7 @@ define amdgpu_kernel void @test_round_lds_arg(ptr addrspace(1) %out, ptr addrspa
 ; FIXME: Parameter alignment not considered
 ; HSA-LABEL: {{^}}test_high_align_lds_arg:
 ; HSA: .amdhsa_group_segment_fixed_size 0
-define amdgpu_kernel void @test_high_align_lds_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) align 64 %lds.arg) #1 {
+define amdgpu_kernel void @test_high_align_lds_arg(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(3) align 64 %lds.arg) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 64 %lds.arg, ptr addrspace(1) align 64 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 64 %out, ptr addrspace(3) align 64 %lds.arg, i32 38, i1 false)
   ret void
@@ -86,7 +86,7 @@ define amdgpu_kernel void @test_high_align_lds_arg(ptr addrspace(1) %out, ptr ad
 ; (39 * 4) + (4 pad) + (7 * 8) = 216
 ; HSA-LABEL: {{^}}test_missing_alignment_size_2_order0:
 ; HSA: .amdhsa_group_segment_fixed_size 216
-define amdgpu_kernel void @test_missing_alignment_size_2_order0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_missing_alignment_size_2_order0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 @lds.missing.align.0, ptr addrspace(1) align 4 %in, i32 160, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 @lds.missing.align.0, i32 160, i1 false)
 
@@ -99,7 +99,7 @@ define amdgpu_kernel void @test_missing_alignment_size_2_order0(ptr addrspace(1)
 ; (39 * 4) + (4 pad) + (7 * 8) = 216
 ; HSA-LABEL: {{^}}test_missing_alignment_size_2_order1:
 ; HSA: .amdhsa_group_segment_fixed_size 216
-define amdgpu_kernel void @test_missing_alignment_size_2_order1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_missing_alignment_size_2_order1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.missing.align.1, ptr addrspace(1) align 8 %in, i32 56, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.missing.align.1, i32 56, i1 false)
 
@@ -113,7 +113,7 @@ define amdgpu_kernel void @test_missing_alignment_size_2_order1(ptr addrspace(1)
 ; 38 + (10 pad) + 38 + (10 pad) + 38  ( = 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order0:
 ; HSA: .amdhsa_group_segment_fixed_size 134
-define amdgpu_kernel void @test_round_size_3_order0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_round_size_3_order0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align32.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align32.0, i32 38, i1 false)
 
@@ -130,7 +130,7 @@ define amdgpu_kernel void @test_round_size_3_order0(ptr addrspace(1) %out, ptr a
 ; 38 (+ 10 pad) + 38 + (10 pad) + 38 ( = 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order1:
 ; HSA: .amdhsa_group_segment_fixed_size 134
-define amdgpu_kernel void @test_round_size_3_order1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_round_size_3_order1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align32.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align32.0, i32 38, i1 false)
 
@@ -147,7 +147,7 @@ define amdgpu_kernel void @test_round_size_3_order1(ptr addrspace(1) %out, ptr a
 ; 38 + (10 pad) + 38 + (10 pad) + 38  ( = 126)
 ; HSA-LABEL: {{^}}test_round_size_3_order2:
 ; HSA: .amdhsa_group_segment_fixed_size 134
-define amdgpu_kernel void @test_round_size_3_order2(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_round_size_3_order2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align16.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align16.0, i32 38, i1 false)
 
@@ -164,7 +164,7 @@ define amdgpu_kernel void @test_round_size_3_order2(ptr addrspace(1) %out, ptr a
 ; 38 + (10 pad) + 38 + (10 pad) + 38 ( = 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order3:
 ; HSA: .amdhsa_group_segment_fixed_size 134
-define amdgpu_kernel void @test_round_size_3_order3(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_round_size_3_order3(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align16.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align16.0, i32 38, i1 false)
 
@@ -181,7 +181,7 @@ define amdgpu_kernel void @test_round_size_3_order3(ptr addrspace(1) %out, ptr a
 ; 38 + (10 pad) + 38 + (10 pad) + 38  (= 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order4:
 ; HSA: .amdhsa_group_segment_fixed_size 134
-define amdgpu_kernel void @test_round_size_3_order4(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_round_size_3_order4(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align8.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align8.0, i32 38, i1 false)
 
@@ -198,7 +198,7 @@ define amdgpu_kernel void @test_round_size_3_order4(ptr addrspace(1) %out, ptr a
 ; 38 + (10 pad) + 38 + (10 pad) + 38  (= 134)
 ; HSA-LABEL: {{^}}test_round_size_3_order5:
 ; HSA: .amdhsa_group_segment_fixed_size 134
-define amdgpu_kernel void @test_round_size_3_order5(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_round_size_3_order5(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 8 @lds.align8.0, ptr addrspace(1) align 8 %in, i32 38, i1 false)
   call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 8 %out, ptr addrspace(3) align 8 @lds.align8.0, i32 38, i1 false)
 
@@ -211,9 +211,5 @@ define amdgpu_kernel void @test_round_size_3_order5(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind }
-attributes #2 = { convergent nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/lds-bounds.ll b/llvm/test/CodeGen/AMDGPU/lds-bounds.ll
index 942b4f7cf4cbce..28d24b4554f070 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-bounds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-bounds.ll
@@ -5,7 +5,7 @@
 
 ; GCN-LABEL: {{^}}store_aligned:
 ; GCN: ds_write_b64
-define amdgpu_cs void @store_aligned(ptr addrspace(3) %ptr) #0 {
+define amdgpu_cs void @store_aligned(ptr addrspace(3) %ptr) nounwind {
 entry:
   %ptr.gep.1 = getelementptr i32, ptr addrspace(3) %ptr, i32 1
 
@@ -17,7 +17,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_aligned:
 ; GCN: ds_read_b64
-define amdgpu_cs <2 x float> @load_aligned(ptr addrspace(3) %ptr) #0 {
+define amdgpu_cs <2 x float> @load_aligned(ptr addrspace(3) %ptr) nounwind {
 entry:
   %ptr.gep.1 = getelementptr i32, ptr addrspace(3) %ptr, i32 1
 
@@ -33,7 +33,7 @@ entry:
 
 ; GCN-LABEL: {{^}}store_global_const_idx:
 ; GCN: ds_write2_b32
-define amdgpu_cs void @store_global_const_idx() #0 {
+define amdgpu_cs void @store_global_const_idx() nounwind {
 entry:
   %ptr.a = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 3
   %ptr.b = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 4
@@ -46,7 +46,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_global_const_idx:
 ; GCN: ds_read2_b32
-define amdgpu_cs <2 x float> @load_global_const_idx() #0 {
+define amdgpu_cs <2 x float> @load_global_const_idx() nounwind {
 entry:
   %ptr.a = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 3
   %ptr.b = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 4
@@ -65,7 +65,7 @@ entry:
 ; SI: ds_write_b32
 ; SI: ds_write_b32
 ; NOSI: ds_write2_b32
-define amdgpu_cs void @store_global_var_idx_case1(i32 %idx) #0 {
+define amdgpu_cs void @store_global_var_idx_case1(i32 %idx) nounwind {
 entry:
   %ptr.a = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 %idx
   %ptr.b = getelementptr i32, ptr addrspace(3) %ptr.a, i32 1
@@ -80,7 +80,7 @@ entry:
 ; SI: ds_read_b32
 ; SI: ds_read_b32
 ; NOSI: ds_read2_b32
-define amdgpu_cs <2 x float> @load_global_var_idx_case1(i32 %idx) #0 {
+define amdgpu_cs <2 x float> @load_global_var_idx_case1(i32 %idx) nounwind {
 entry:
   %ptr.a = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 %idx
   %ptr.b = getelementptr i32, ptr addrspace(3) %ptr.a, i32 1
@@ -97,7 +97,7 @@ entry:
 
 ; GCN-LABEL: {{^}}store_global_var_idx_case2:
 ; GCN: ds_write2_b32
-define amdgpu_cs void @store_global_var_idx_case2(i32 %idx) #0 {
+define amdgpu_cs void @store_global_var_idx_case2(i32 %idx) nounwind {
 entry:
   %idx.and = and i32 %idx, 255
   %ptr.a = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 %idx.and
@@ -111,7 +111,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_global_var_idx_case2:
 ; GCN: ds_read2_b32
-define amdgpu_cs <2 x float> @load_global_var_idx_case2(i32 %idx) #0 {
+define amdgpu_cs <2 x float> @load_global_var_idx_case2(i32 %idx) nounwind {
 entry:
   %idx.and = and i32 %idx, 255
   %ptr.a = getelementptr [512 x i32], ptr addrspace(3) @compute_lds, i32 0, i32 %idx.and
@@ -125,5 +125,3 @@ entry:
   %bc = bitcast <2 x i32> %r.1 to <2 x float>
   ret <2 x float> %bc
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
index 9619cb73b1538e..3d545f5903b66f 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
@@ -13,7 +13,7 @@
 @module_variable = addrspace(3) global i16 undef
 
 ; Variables are allocated into module scope block when used by a non-kernel function
-define void @use_module() #0 {
+define void @use_module() noinline {
 ; CHECK-LABEL: use_module:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -35,7 +35,7 @@ define void @use_module() #0 {
 
 
 ; External LDS does not influence the frame when called indirectly either
-define void @use_extern_normal() #0 {
+define void @use_extern_normal() noinline {
 ; CHECK-LABEL: use_extern_normal:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -59,7 +59,7 @@ define void @use_extern_normal() #0 {
   ret void
 }
 
-define void @use_extern_overalign() #0 {
+define void @use_extern_overalign() noinline {
 ; CHECK-LABEL: use_extern_overalign:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -609,8 +609,5 @@ define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i
   ret void
 }
 
-
-attributes #0 = { noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll b/llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
index 273a0bd978c603..af19bf07731c8f 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
@@ -18,7 +18,7 @@
 
 ; GCN: .LBB0_3:
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @copy_local_to_global_loop_m0_init(ptr addrspace(1) noalias nocapture %out, ptr addrspace(3) noalias nocapture readonly %in, i32 %n) #0 {
+define amdgpu_kernel void @copy_local_to_global_loop_m0_init(ptr addrspace(1) noalias nocapture %out, ptr addrspace(3) noalias nocapture readonly %in, i32 %n) nounwind {
 bb:
   %tmp = icmp sgt i32 %n, 0
   br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
@@ -45,5 +45,3 @@ bb:
   %exitcond = icmp eq i32 %lftr.wideiv, %n
   br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll
index 60dbd1c082a245..b13901dfb1fcef 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll
@@ -42,7 +42,7 @@
 ; GCN: .amdgpu_lds lds.external, 0, 4
 ; GCN: .globl lds.defined
 ; GCN: .amdgpu_lds lds.defined, 32, 8
-define amdgpu_gs float @test_basic(i32 inreg %wave, i32 %arg1) #0 {
+define amdgpu_gs float @test_basic(i32 inreg %wave, i32 %arg1) "no-signed-zeros-fp-math"="true" {
 main_body:
   %gep0 = getelementptr [0 x i32], ptr addrspace(3) @lds.external, i32 0, i32 %arg1
   %tmp = load i32, ptr addrspace(3) %gep0
@@ -55,7 +55,4 @@ main_body:
 }
 
 ; Function Attrs: convergent nounwind readnone
-declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32) #4
-
-attributes #0 = { "no-signed-zeros-fp-math"="true" }
-attributes #4 = { convergent nounwind readnone }
+declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32) convergent nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll
index d23dee1f02f09c..a79dbbfb7e8554 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll
@@ -275,9 +275,7 @@ define <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_s
   ret <4 x half> %val
 }
 
-declare half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll
index bdcb77201714ab..9d20f87a0fdbc0 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll
@@ -344,9 +344,7 @@ define <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_
   ret <4 x float> %val
 }
 
-declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll
index 5b8bd1f60233b8..0b44b2745a0682 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll
@@ -325,9 +325,7 @@ define <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sg
   ret <4 x half> %val
 }
 
-declare half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll
index 7dabd9a3957468..24aef0abb310d4 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll
@@ -404,9 +404,7 @@ define <4 x float> @raw_ptr_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voffset__s
   ret <4 x float> %val
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll
index 24dc4f1b3c0aa9..1f53ec33b47299 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll
@@ -508,9 +508,7 @@ define half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(ptr
   ret half %val
 }
 
-declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll
index 01dc0328f2d2d9..d2c417b3f5311f 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll
@@ -583,9 +583,7 @@ define float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(pt
   ret float %val
 }
 
-declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll
index bcffca8a3c4fca..76157320ebaab3 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll
@@ -428,9 +428,7 @@ define half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(<4
   ret half %val
 }
 
-declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll
index 51e56a47fc2f25..3abcb8b775dad7 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll
@@ -493,9 +493,7 @@ define float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(<4
   ret float %val
 }
 
-declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/literals.ll b/llvm/test/CodeGen/AMDGPU/literals.ll
index 0e6400e83d4863..6d62719ff061c0 100644
--- a/llvm/test/CodeGen/AMDGPU/literals.ll
+++ b/llvm/test/CodeGen/AMDGPU/literals.ll
@@ -59,6 +59,4 @@ entry:
   ret void
 }
 
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
-
-attributes #1 = { readnone }
+declare float @llvm.r600.dot4(<4 x float>, <4 x float>) readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll
index 8b16fef915a79d..b67b4c02c8293a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll
@@ -1,14 +1,11 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_alignbyte_b32:
 ; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}
-define amdgpu_kernel void @v_alignbyte_b32(ptr addrspace(1) %out, i32 %src1, i32 %src2, i32 %src3) #1 {
-  %val = call i32 @llvm.amdgcn.alignbyte(i32 %src1, i32 %src2, i32 %src3) #0
+define amdgpu_kernel void @v_alignbyte_b32(ptr addrspace(1) %out, i32 %src1, i32 %src2, i32 %src3) nounwind {
+  %val = call i32 @llvm.amdgcn.alignbyte(i32 %src1, i32 %src2, i32 %src3) nounwind readnone
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll
index 9b63a8a3efcf92..03c15782ce69e1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12
 
-define float @raw_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+define float @raw_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind {
 ; GFX12-LABEL: raw_buffer_atomic_cond_sub_return:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -19,7 +19,7 @@ main_body:
   ret float %r
 }
 
-define void @raw_buffer_atomic_cond_sub_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+define void @raw_buffer_atomic_cond_sub_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind {
 ; GFX12-LABEL: raw_buffer_atomic_cond_sub_no_return:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -36,7 +36,7 @@ main_body:
   ret void
 }
 
-define void @raw_buffer_atomic_cond_sub_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 {
+define void @raw_buffer_atomic_cond_sub_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind "target-features"="+atomic-csub-no-rtn-insts" {
 ; GFX12-LABEL: raw_buffer_atomic_cond_sub_no_return_forced:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -52,7 +52,7 @@ main_body:
   ret void
 }
 
-define float @raw_buffer_atomic_cond_sub_imm_soff_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+define float @raw_buffer_atomic_cond_sub_imm_soff_return(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind {
 ; GFX12-LABEL: raw_buffer_atomic_cond_sub_imm_soff_return:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -71,7 +71,7 @@ main_body:
   ret float %r
 }
 
-define void @raw_buffer_atomic_cond_sub_imm_soff_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+define void @raw_buffer_atomic_cond_sub_imm_soff_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind {
 ; GFX12-LABEL: raw_buffer_atomic_cond_sub_imm_soff_no_return:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -89,7 +89,7 @@ main_body:
   ret void
 }
 
-define void @raw_buffer_atomic_cond_sub_imm_soff_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 {
+define void @raw_buffer_atomic_cond_sub_imm_soff_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind "target-features"="+atomic-csub-no-rtn-insts" {
 ; GFX12-LABEL: raw_buffer_atomic_cond_sub_imm_soff_no_return_forced:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -106,7 +106,7 @@ main_body:
   ret void
 }
 
-define float @struct_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+define float @struct_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind {
 ; GFX12-LABEL: struct_buffer_atomic_cond_sub_return:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -124,7 +124,7 @@ main_body:
   ret float %r
 }
 
-define void @struct_buffer_atomic_cond_sub_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+define void @struct_buffer_atomic_cond_sub_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind {
 ; GFX12-LABEL: struct_buffer_atomic_cond_sub_no_return:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -141,7 +141,7 @@ main_body:
   ret void
 }
 
-define void @struct_buffer_atomic_cond_sub_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 {
+define void @struct_buffer_atomic_cond_sub_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind "target-features"="+atomic-csub-no-rtn-insts" {
 ; GFX12-LABEL: struct_buffer_atomic_cond_sub_no_return_forced:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -157,7 +157,7 @@ main_body:
   ret void
 }
 
-define float @struct_buffer_atomic_cond_sub_imm_soff_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+define float @struct_buffer_atomic_cond_sub_imm_soff_return(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind {
 ; GFX12-LABEL: struct_buffer_atomic_cond_sub_imm_soff_return:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -176,7 +176,7 @@ main_body:
   ret float %r
 }
 
-define void @struct_buffer_atomic_cond_sub_imm_soff_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
+define void @struct_buffer_atomic_cond_sub_imm_soff_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind {
 ; GFX12-LABEL: struct_buffer_atomic_cond_sub_imm_soff_no_return:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -194,7 +194,7 @@ main_body:
   ret void
 }
 
-define void @struct_buffer_atomic_cond_sub_imm_soff_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 {
+define void @struct_buffer_atomic_cond_sub_imm_soff_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) nounwind "target-features"="+atomic-csub-no-rtn-insts" {
 ; GFX12-LABEL: struct_buffer_atomic_cond_sub_imm_soff_no_return_forced:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
@@ -211,9 +211,6 @@ main_body:
   ret void
 }
 
-declare i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "target-features"="+atomic-csub-no-rtn-insts" }
+declare i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll
index 2b0584d39a3be4..3ab6201b4c95bb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll
@@ -17,7 +17,7 @@ main_body:
 ; GCN-LABEL: {{^}}buffer_atomic_csub_no_rtn:
 ; PREGFX12: buffer_atomic_csub v0, v1, s[0:3], 0 idxen
 ; GFX12PLUS: buffer_atomic_sub_clamp_u32 v0, v1, s[0:3], null idxen
-define amdgpu_ps void @buffer_atomic_csub_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 {
+define amdgpu_ps void @buffer_atomic_csub_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) "target-features"="+atomic-csub-no-rtn-insts" {
 main_body:
   %ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
   ret void
@@ -35,7 +35,7 @@ main_body:
 ; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc_no_rtn:
 ; PREGFX12: buffer_atomic_csub v0, v1, s[0:3], 0 idxen offset:4 slc
 ; GFX12PLUS: buffer_atomic_sub_clamp_u32 v0, v1, s[0:3], null idxen offset:4 th:TH_ATOMIC_NT
-define amdgpu_ps void @buffer_atomic_csub_off4_slc_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 {
+define amdgpu_ps void @buffer_atomic_csub_off4_slc_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) "target-features"="+atomic-csub-no-rtn-insts" {
 main_body:
   %ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
   ret void
@@ -53,7 +53,7 @@ main_body:
 ; GCN-LABEL: {{^}}global_atomic_csub_no_rtn:
 ; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
 ; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1]
-define amdgpu_kernel void @global_atomic_csub_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
+define amdgpu_kernel void @global_atomic_csub_no_rtn(ptr addrspace(1) %ptr, i32 %data) "target-features"="+atomic-csub-no-rtn-insts" {
 main_body:
   %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data)
   ret void
@@ -72,11 +72,9 @@ main_body:
 ; GCN-LABEL: {{^}}global_atomic_csub_off4_no_rtn:
 ; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
 ; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1] offset:4
-define amdgpu_kernel void @global_atomic_csub_off4_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
+define amdgpu_kernel void @global_atomic_csub_off4_no_rtn(ptr addrspace(1) %ptr, i32 %data) "target-features"="+atomic-csub-no-rtn-insts" {
 main_body:
   %p = getelementptr i32, ptr addrspace(1) %ptr, i64 1
   %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data)
   ret void
 }
-
-attributes #0 = { "target-features"="+atomic-csub-no-rtn-insts" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
index 49e272bed9cefd..2603b478e8e719 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
@@ -2,15 +2,15 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,CIVI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) nounwind argmemonly
+declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) nounwind argmemonly
+declare i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr nocapture, i32, i32, i32, i1) nounwind argmemonly
 
-declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) nounwind argmemonly
+declare i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) nounwind argmemonly
+declare i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr nocapture, i64, i32, i32, i1) nounwind argmemonly
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
 ; CIVI-DAG: s_mov_b32 m0
@@ -18,7 +18,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
-define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr addrspace(1) %out
   ret void
@@ -30,7 +30,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr add
 
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
-define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
   %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr addrspace(1) %out
@@ -67,7 +67,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr
 ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; GFX9: global_atomic_dec v{{[0-9]+}}, [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
-define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr addrspace(1) %out
   ret void
@@ -79,7 +79,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr
 
 ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; GFX9: global_atomic_dec v{{[0-9]+}}, [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
-define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr addrspace(1) %out
@@ -113,7 +113,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
 ; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
-define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id
@@ -127,7 +127,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
 ; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
-define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspace(1) %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
   %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
@@ -138,7 +138,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspa
 ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr %out
   ret void
@@ -148,7 +148,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #0 {
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
 ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) nounwind {
   %gep = getelementptr i32, ptr %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr %out
@@ -177,7 +177,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) nounwind {
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
 ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr %ptr, i32 %id
   %out.gep = getelementptr i32, ptr %out, i32 %id
@@ -191,7 +191,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
 ; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr %ptr, i32 %id
   %gep = getelementptr i32, ptr %gep.tid, i32 5
@@ -203,7 +203,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #0
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr %out
   ret void
@@ -214,7 +214,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #0 {
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) nounwind {
   %gep = getelementptr i64, ptr %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr %out
@@ -246,7 +246,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) nounwind {
 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40 glc{{$}}
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr %ptr, i32 %id
   %out.gep = getelementptr i64, ptr %out, i32 %id
@@ -261,7 +261,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %
 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
 ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40{{$}}
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr %ptr, i32 %id
   %gep = getelementptr i64, ptr %gep.tid, i32 5
@@ -277,8 +277,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #0
 
 ; GCN-DAG:  v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
   %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %arrayidx0, i32 9, i32 0, i32 0, i1 false)
@@ -294,7 +294,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}}
-define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr addrspace(1) %out
   ret void
@@ -307,7 +307,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr add
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32
-define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
   %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr addrspace(1) %out
@@ -346,7 +346,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr
 ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 
 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
-define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr addrspace(1) %out
   ret void
@@ -358,7 +358,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr
 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
 ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
-define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr addrspace(1) %out
@@ -394,7 +394,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %
 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
-define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id
@@ -410,7 +410,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace
 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
 ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]]{{$}}
-define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspace(1) %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
   %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
@@ -426,8 +426,8 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspa
 
 ; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
 ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
-define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
   %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %arrayidx0, i64 9, i32 0, i32 0, i1 false)
@@ -435,7 +435,3 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out,
   store i64 %val0, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind argmemonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
index bd07dd137ac492..5b02b45f27d1f1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
@@ -95,17 +95,14 @@ main_body:
 ; the feature set.
 ; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:
 ; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
-define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(ptr addrspace(1) %ptr, float %data) #0 {
+define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(ptr addrspace(1) %ptr, float %data) "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts" {
   %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
   ret void
 }
 
 ; GCN-LABEL: {{^}}flat_atomic_fadd_f32_wrong_subtarget:
 ; GCN: flat_atomic_add_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
-define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(ptr %ptr, float %data) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(ptr %ptr, float %data) "target-cpu"="gfx803" "target-features"="+flat-atomic-fadd-f32-inst" {
   %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
   ret void
 }
-
-attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts"}
-attributes #1 = { "target-cpu"="gfx803" "target-features"="+flat-atomic-fadd-f32-inst"}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
index 2ed61352f45926..ccf24e67225d81 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
@@ -2,15 +2,15 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
-declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) nounwind argmemonly
+declare i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) nounwind argmemonly
+declare i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr nocapture, i32, i32, i32, i1) nounwind argmemonly
 
-declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) nounwind argmemonly
+declare i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) nounwind argmemonly
+declare i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr nocapture, i64, i32, i32, i1) nounwind argmemonly
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
 ; CIVI-DAG: s_mov_b32 m0
@@ -18,7 +18,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
-define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr addrspace(1) %out
   ret void
@@ -30,7 +30,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr add
 
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
-define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
   %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr addrspace(1) %out
@@ -66,7 +66,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
-define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr addrspace(1) %out
   ret void
@@ -76,7 +76,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
 ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
-define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr addrspace(1) %out
@@ -106,7 +106,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) %
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
 ; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
-define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id
@@ -120,7 +120,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
 ; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
-define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspace(1) %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
   %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
@@ -133,8 +133,8 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspa
 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
   %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %arrayidx0, i32 9, i32 0, i32 0, i1 false)
@@ -147,7 +147,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out,
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}}
-define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr addrspace(1) %out
   ret void
@@ -157,7 +157,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr add
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32
-define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
   %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr addrspace(1) %out
@@ -189,7 +189,7 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(ptr addrspace(3) %ptr
 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
-define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr addrspace(1) %out
   ret void
@@ -201,7 +201,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr
 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
 ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}}
-define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr addrspace(1) %out
@@ -238,7 +238,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) %
 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
-define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id
@@ -254,7 +254,7 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace
 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
 ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]]{{$}}
-define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspace(1) %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
   %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
@@ -265,7 +265,7 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspa
 ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr %out
   ret void
@@ -275,7 +275,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #0 {
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) nounwind {
   %gep = getelementptr i32, ptr %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
   store i32 %result, ptr %out
@@ -304,7 +304,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) nounwind {
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
 ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr %ptr, i32 %id
   %out.gep = getelementptr i32, ptr %out, i32 %id
@@ -318,7 +318,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
 ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i32, ptr %ptr, i32 %id
   %gep = getelementptr i32, ptr %gep.tid, i32 5
@@ -331,8 +331,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #0
 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
 ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
-define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
   %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %arrayidx0, i64 9, i32 0, i32 0, i1 false)
@@ -345,7 +345,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out,
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr %out
   ret void
@@ -356,7 +356,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #0 {
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) nounwind {
   %gep = getelementptr i64, ptr %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
   store i64 %result, ptr %out
@@ -388,7 +388,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) nounwind {
 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}}
 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40 glc{{$}}
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr %ptr, i32 %id
   %out.gep = getelementptr i64, ptr %out, i32 %id
@@ -403,7 +403,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
 ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}}
 ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40{{$}}
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.tid = getelementptr i64, ptr %ptr, i32 %id
   %gep = getelementptr i64, ptr %gep.tid, i32 5
@@ -415,7 +415,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #0
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
-define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(3) %ptr) #0 {
+define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(3) %ptr) nounwind {
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
   %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
 
@@ -423,7 +423,3 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0,
   store i32 %result1, ptr addrspace(1) %out1
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind argmemonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
index eed648f167f398..cdaef406838043 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@@ -184,26 +184,24 @@ main_body:
   ret float %v.float
 }
 
-declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.sub.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.smin.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.umin.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.smax.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.umax.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.and.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.or.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.xor.i64(i64, <4 x i32>, i32, i32, i1) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) nounwind
+declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.sub.i64(i64, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.smin.i64(i64, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.umin.i64(i64, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.smax.i64(i64, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.umax.i64(i64, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.and.i64(i64, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.or.i64(i64, <4 x i32>, i32, i32, i1) nounwind
+declare i64 @llvm.amdgcn.buffer.atomic.xor.i64(i64, <4 x i32>, i32, i32, i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
index 6851302fdcda34..e9c54f484e316c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
@@ -126,8 +126,6 @@ main_body:
   ret <2 x float> %data
 }
 
-declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
index a209dcfe3a7a0e..800d285c9c862b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
@@ -466,11 +466,9 @@ define amdgpu_ps float @no_fold_fi_reg_soffset(<4 x i32> inreg %rsrc, i32 inreg
   ret float %ret.val
 }
 
-declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0
-declare i8 @llvm.amdgcn.buffer.load.i8(<4 x i32>, i32, i32, i1, i1) #0
-declare i16 @llvm.amdgcn.buffer.load.i16(<4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare i8 @llvm.amdgcn.buffer.load.i8(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare i16 @llvm.amdgcn.buffer.load.i16(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
index 41e2b4d0e55125..8ac13e310d0be9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
@@ -95,10 +95,7 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.buffer.store.format.f32(float, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.buffer.store.format.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) nounwind
+declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
index 8b18848a62792c..df4c8a8dc3236c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
@@ -89,7 +89,7 @@ main_body:
 ;CHECK-LABEL: {{^}}buffer_store_x2:
 ;CHECK-NOT: s_waitcnt
 ;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 idxen
-define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %index) #0 {
+define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %index) nounwind {
 main_body:
   call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0)
   ret void
@@ -257,12 +257,9 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.i8(i8, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.i16(i16, <4 x i32>, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.buffer.store.i8(i8, <4 x i32>, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.buffer.store.i16(i16, <4 x i32>, i32, i32, i1, i1) nounwind
+declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
index bd1888b6965a14..6e4e90056a2221 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
@@ -1,16 +1,14 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare void @llvm.amdgcn.buffer.wbinvl1() #0
+declare void @llvm.amdgcn.buffer.wbinvl1() nounwind
 
 ; GCN-LABEL: {{^}}test_buffer_wbinvl1:
 ; GCN-NEXT: ; %bb.0:
 ; SI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00]
 ; VI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xf8,0xe0,0x00,0x00,0x00,0x00]
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @test_buffer_wbinvl1() #0 {
+define amdgpu_kernel void @test_buffer_wbinvl1() nounwind {
   call void @llvm.amdgcn.buffer.wbinvl1()
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
index b937c42e14ed5a..0f4e21a83f31f5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
@@ -1,14 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=SI %s
 
-declare void @llvm.amdgcn.buffer.wbinvl1.sc() #0
+declare void @llvm.amdgcn.buffer.wbinvl1.sc() nounwind
 
 ; SI-LABEL: {{^}}test_buffer_wbinvl1_sc:
 ; SI-NEXT: ; %bb.0:
 ; SI-NEXT: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
 ; SI-NEXT: s_endpgm
-define amdgpu_kernel void @test_buffer_wbinvl1_sc() #0 {
+define amdgpu_kernel void @test_buffer_wbinvl1_sc() nounwind {
   call void @llvm.amdgcn.buffer.wbinvl1.sc()
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
index 64ab8ecefd4909..f352b7960907e8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0
+declare void @llvm.amdgcn.buffer.wbinvl1.vol() nounwind
 
 ; GCN-LABEL: {{^}}test_buffer_wbinvl1_vol:
 ; GCN-NEXT: ; %bb.0:
@@ -9,11 +9,9 @@ declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0
 ; VI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
 ; GCN: _store_byte
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @test_buffer_wbinvl1_vol(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @test_buffer_wbinvl1_vol(ptr addrspace(1) %ptr) nounwind {
   call void @llvm.amdgcn.buffer.wbinvl1.vol()
 ; This used to crash in hazard recognizer
   store i8 0, ptr addrspace(1) %ptr, align 1
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
index 27fb4e5f965c93..464b04c0be8897 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
@@ -1,10 +1,10 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
 
-declare i1 @llvm.amdgcn.class.f32(float, i32) #1
-declare i1 @llvm.amdgcn.class.f64(double, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare double @llvm.fabs.f64(double) #1
+declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone
+declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
 
 ; SI-LABEL: {{^}}test_class_f32:
 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
@@ -14,8 +14,8 @@ declare double @llvm.fabs.f64(double) #1
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 {
-  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1
+define amdgpu_kernel void @test_class_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -29,9 +29,9 @@ define amdgpu_kernel void @test_class_f32(ptr addrspace(1) %out, [8 x i32], floa
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_fabs_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 {
-  %a.fabs = call float @llvm.fabs.f32(float %a) #1
-  %result = call i1 @llvm.amdgcn.class.f32(float %a.fabs, i32 %b) #1
+define amdgpu_kernel void @test_class_fabs_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) nounwind {
+  %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
+  %result = call i1 @llvm.amdgcn.class.f32(float %a.fabs, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -45,9 +45,9 @@ define amdgpu_kernel void @test_class_fabs_f32(ptr addrspace(1) %out, [8 x i32],
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_fneg_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 {
+define amdgpu_kernel void @test_class_fneg_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) nounwind {
   %a.fneg = fsub float -0.0, %a
-  %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg, i32 %b) #1
+  %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -61,10 +61,10 @@ define amdgpu_kernel void @test_class_fneg_f32(ptr addrspace(1) %out, [8 x i32],
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_fneg_fabs_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 {
-  %a.fabs = call float @llvm.fabs.f32(float %a) #1
+define amdgpu_kernel void @test_class_fneg_fabs_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) nounwind {
+  %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
   %a.fneg.fabs = fsub float -0.0, %a.fabs
-  %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg.fabs, i32 %b) #1
+  %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg.fabs, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -76,8 +76,8 @@ define amdgpu_kernel void @test_class_fneg_fabs_f32(ptr addrspace(1) %out, [8 x
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]]
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_1_f32(ptr addrspace(1) %out, float %a) #0 {
-  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
+define amdgpu_kernel void @test_class_1_f32(ptr addrspace(1) %out, float %a) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -89,8 +89,8 @@ define amdgpu_kernel void @test_class_1_f32(ptr addrspace(1) %out, float %a) #0
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]]
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_64_f32(ptr addrspace(1) %out, float %a) #0 {
-  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1
+define amdgpu_kernel void @test_class_64_f32(ptr addrspace(1) %out, float %a) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -104,8 +104,8 @@ define amdgpu_kernel void @test_class_64_f32(ptr addrspace(1) %out, float %a) #0
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_full_mask_f32(ptr addrspace(1) %out, float %a) #0 {
-  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1
+define amdgpu_kernel void @test_class_full_mask_f32(ptr addrspace(1) %out, float %a) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -118,8 +118,8 @@ define amdgpu_kernel void @test_class_full_mask_f32(ptr addrspace(1) %out, float
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_9bit_mask_f32(ptr addrspace(1) %out, float %a) #0 {
-  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1
+define amdgpu_kernel void @test_class_9bit_mask_f32(ptr addrspace(1) %out, float %a) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -132,13 +132,13 @@ define amdgpu_kernel void @test_class_9bit_mask_f32(ptr addrspace(1) %out, float
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, s[{{[0-9]}}:{{[0-9]}}]
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @v_test_class_full_mask_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_test_class_full_mask_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %a = load float, ptr addrspace(1) %gep.in
 
-  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1
+  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %gep.out, align 4
   ret void
@@ -150,13 +150,13 @@ define amdgpu_kernel void @v_test_class_full_mask_f32(ptr addrspace(1) %out, ptr
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %b = load i32, ptr addrspace(1) %gep.in
 
-  %result = call i1 @llvm.amdgcn.class.f32(float 1.0, i32 %b) #1
+  %result = call i1 @llvm.amdgcn.class.f32(float 1.0, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %gep.out, align 4
   ret void
@@ -170,13 +170,13 @@ define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f32(ptr a
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %b = load i32, ptr addrspace(1) %gep.in
 
-  %result = call i1 @llvm.amdgcn.class.f32(float 1024.0, i32 %b) #1
+  %result = call i1 @llvm.amdgcn.class.f32(float 1024.0, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %gep.out, align 4
   ret void
@@ -190,8 +190,8 @@ define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f32(ptr addrspac
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 {
-  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1
+define amdgpu_kernel void @test_class_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -205,9 +205,9 @@ define amdgpu_kernel void @test_class_f64(ptr addrspace(1) %out, [8 x i32], doub
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_fabs_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 {
-  %a.fabs = call double @llvm.fabs.f64(double %a) #1
-  %result = call i1 @llvm.amdgcn.class.f64(double %a.fabs, i32 %b) #1
+define amdgpu_kernel void @test_class_fabs_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) nounwind {
+  %a.fabs = call double @llvm.fabs.f64(double %a) nounwind readnone
+  %result = call i1 @llvm.amdgcn.class.f64(double %a.fabs, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -221,9 +221,9 @@ define amdgpu_kernel void @test_class_fabs_f64(ptr addrspace(1) %out, [8 x i32],
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_fneg_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 {
+define amdgpu_kernel void @test_class_fneg_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) nounwind {
   %a.fneg = fsub double -0.0, %a
-  %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg, i32 %b) #1
+  %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -237,10 +237,10 @@ define amdgpu_kernel void @test_class_fneg_f64(ptr addrspace(1) %out, [8 x i32],
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_fneg_fabs_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 {
-  %a.fabs = call double @llvm.fabs.f64(double %a) #1
+define amdgpu_kernel void @test_class_fneg_fabs_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) nounwind {
+  %a.fabs = call double @llvm.fabs.f64(double %a) nounwind readnone
   %a.fneg.fabs = fsub double -0.0, %a.fabs
-  %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg.fabs, i32 %b) #1
+  %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg.fabs, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -249,8 +249,8 @@ define amdgpu_kernel void @test_class_fneg_fabs_f64(ptr addrspace(1) %out, [8 x
 ; SI-LABEL: {{^}}test_class_1_f64:
 ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_1_f64(ptr addrspace(1) %out, double %a) #0 {
-  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) #1
+define amdgpu_kernel void @test_class_1_f64(ptr addrspace(1) %out, double %a) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -259,8 +259,8 @@ define amdgpu_kernel void @test_class_1_f64(ptr addrspace(1) %out, double %a) #0
 ; SI-LABEL: {{^}}test_class_64_f64:
 ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_64_f64(ptr addrspace(1) %out, double %a) #0 {
-  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) #1
+define amdgpu_kernel void @test_class_64_f64(ptr addrspace(1) %out, double %a) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -275,8 +275,8 @@ define amdgpu_kernel void @test_class_64_f64(ptr addrspace(1) %out, double %a) #
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_full_mask_f64(ptr addrspace(1) %out, [8 x i32], double %a) #0 {
-  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1
+define amdgpu_kernel void @test_class_full_mask_f64(ptr addrspace(1) %out, [8 x i32], double %a) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -290,13 +290,13 @@ define amdgpu_kernel void @test_class_full_mask_f64(ptr addrspace(1) %out, [8 x
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, s[{{[0-9]}}:{{[0-9]}}]
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @v_test_class_full_mask_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @v_test_class_full_mask_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %a = load double, ptr addrspace(1) %in
 
-  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1
+  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %gep.out, align 4
   ret void
@@ -306,13 +306,13 @@ define amdgpu_kernel void @v_test_class_full_mask_f64(ptr addrspace(1) %out, ptr
 ; XSI: v_cmp_class_f64_e32 vcc, 1.0,
 ; SI: v_cmp_class_f64_e32 vcc,
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %b = load i32, ptr addrspace(1) %gep.in
 
-  %result = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 %b) #1
+  %result = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %gep.out, align 4
   ret void
@@ -321,13 +321,13 @@ define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f64(ptr a
 ; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f64:
 ; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %b = load i32, ptr addrspace(1) %gep.in
 
-  %result = call i1 @llvm.amdgcn.class.f64(double 1024.0, i32 %b) #1
+  %result = call i1 @llvm.amdgcn.class.f64(double 1024.0, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %gep.out, align 4
   ret void
@@ -338,14 +338,14 @@ define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f64(ptr addrspac
 ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 3{{$}}
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
-define amdgpu_kernel void @test_fold_or_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fold_or_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %a = load float, ptr addrspace(1) %gep.in
 
-  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
-  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 3) #1
+  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) nounwind readnone
+  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 3) nounwind readnone
   %or = or i1 %class0, %class1
 
   %sext = sext i1 %or to i32
@@ -358,15 +358,15 @@ define amdgpu_kernel void @test_fold_or_class_f32_0(ptr addrspace(1) %out, ptr a
 ; SI: v_cmp_class_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}}
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
-define amdgpu_kernel void @test_fold_or3_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fold_or3_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %a = load float, ptr addrspace(1) %gep.in
 
-  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
-  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1
-  %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
+  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) nounwind readnone
+  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) nounwind readnone
+  %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) nounwind readnone
   %or.0 = or i1 %class0, %class1
   %or.1 = or i1 %or.0, %class2
 
@@ -381,22 +381,22 @@ define amdgpu_kernel void @test_fold_or3_class_f32_0(ptr addrspace(1) %out, ptr
 ; SI: v_cmp_class_f32_e64 s[0:1], v{{[0-9]+}}, [[MASK]]{{$}}
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
-define amdgpu_kernel void @test_fold_or_all_tests_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fold_or_all_tests_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %a = load float, ptr addrspace(1) %gep.in
 
-  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
-  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1
-  %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
-  %class3 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1
-  %class4 = call i1 @llvm.amdgcn.class.f32(float %a, i32 16) #1
-  %class5 = call i1 @llvm.amdgcn.class.f32(float %a, i32 32) #1
-  %class6 = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1
-  %class7 = call i1 @llvm.amdgcn.class.f32(float %a, i32 128) #1
-  %class8 = call i1 @llvm.amdgcn.class.f32(float %a, i32 256) #1
-  %class9 = call i1 @llvm.amdgcn.class.f32(float %a, i32 512) #1
+  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) nounwind readnone
+  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) nounwind readnone
+  %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) nounwind readnone
+  %class3 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) nounwind readnone
+  %class4 = call i1 @llvm.amdgcn.class.f32(float %a, i32 16) nounwind readnone
+  %class5 = call i1 @llvm.amdgcn.class.f32(float %a, i32 32) nounwind readnone
+  %class6 = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) nounwind readnone
+  %class7 = call i1 @llvm.amdgcn.class.f32(float %a, i32 128) nounwind readnone
+  %class8 = call i1 @llvm.amdgcn.class.f32(float %a, i32 256) nounwind readnone
+  %class9 = call i1 @llvm.amdgcn.class.f32(float %a, i32 512) nounwind readnone
   %or.0 = or i1 %class0, %class1
   %or.1 = or i1 %or.0, %class2
   %or.2 = or i1 %or.1, %class3
@@ -416,14 +416,14 @@ define amdgpu_kernel void @test_fold_or_all_tests_class_f32_0(ptr addrspace(1) %
 ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 12{{$}}
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
-define amdgpu_kernel void @test_fold_or_class_f32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fold_or_class_f32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %a = load float, ptr addrspace(1) %gep.in
 
-  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
-  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1
+  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) nounwind readnone
+  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) nounwind readnone
   %or = or i1 %class0, %class1
 
   %sext = sext i1 %or to i32
@@ -436,14 +436,14 @@ define amdgpu_kernel void @test_fold_or_class_f32_1(ptr addrspace(1) %out, ptr a
 ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}}
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
-define amdgpu_kernel void @test_fold_or_class_f32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_fold_or_class_f32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %a = load float, ptr addrspace(1) %gep.in
 
-  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1
-  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1
+  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) nounwind readnone
+  %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) nounwind readnone
   %or = or i1 %class0, %class1
 
   %sext = sext i1 %or to i32
@@ -456,14 +456,14 @@ define amdgpu_kernel void @test_fold_or_class_f32_2(ptr addrspace(1) %out, ptr a
 ; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 8{{$}}
 ; SI: s_or_b64
 ; SI: s_endpgm
-define amdgpu_kernel void @test_no_fold_or_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @test_no_fold_or_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
   %a = load float, ptr addrspace(1) %gep.in
 
-  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1
-  %class1 = call i1 @llvm.amdgcn.class.f32(float %b, i32 8) #1
+  %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) nounwind readnone
+  %class1 = call i1 @llvm.amdgcn.class.f32(float %b, i32 8) nounwind readnone
   %or = or i1 %class0, %class1
 
   %sext = sext i1 %or to i32
@@ -476,8 +476,8 @@ define amdgpu_kernel void @test_no_fold_or_class_f32_0(ptr addrspace(1) %out, pt
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_0_f32(ptr addrspace(1) %out, float %a) #0 {
-  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) #1
+define amdgpu_kernel void @test_class_0_f32(ptr addrspace(1) %out, float %a) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -488,8 +488,8 @@ define amdgpu_kernel void @test_class_0_f32(ptr addrspace(1) %out, float %a) #0
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_0_f64(ptr addrspace(1) %out, double %a) #0 {
-  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) #1
+define amdgpu_kernel void @test_class_0_f64(ptr addrspace(1) %out, double %a) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -500,8 +500,8 @@ define amdgpu_kernel void @test_class_0_f64(ptr addrspace(1) %out, double %a) #0
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @test_class_undef_f32(ptr addrspace(1) %out, float %a, i32 %b) #0 {
-  %result = call i1 @llvm.amdgcn.class.f32(float undef, i32 %b) #1
+define amdgpu_kernel void @test_class_undef_f32(ptr addrspace(1) %out, float %a, i32 %b) nounwind {
+  %result = call i1 @llvm.amdgcn.class.f32(float undef, i32 %b) nounwind readnone
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
@@ -513,7 +513,7 @@ define amdgpu_kernel void @test_class_undef_f32(ptr addrspace(1) %out, float %a,
 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, [[COND]]
 ; SI-NEXT: s_setpc_b64
 define i1 @test_fold_and_ord(float %a) {
-  %class = call i1 @llvm.amdgcn.class.f32(float %a, i32 35) #1
+  %class = call i1 @llvm.amdgcn.class.f32(float %a, i32 35) nounwind readnone
   %ord = fcmp ord float %a, %a
   %and = and i1 %ord, %class
   ret i1 %and
@@ -525,7 +525,7 @@ define i1 @test_fold_and_ord(float %a) {
 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, [[COND]]
 ; SI-NEXT: s_setpc_b64
 define i1 @test_fold_and_unord(float %a) {
-  %class = call i1 @llvm.amdgcn.class.f32(float %a, i32 35) #1
+  %class = call i1 @llvm.amdgcn.class.f32(float %a, i32 35) nounwind readnone
   %ord = fcmp uno float %a, %a
   %and = and i1 %ord, %class
   ret i1 %and
@@ -537,12 +537,9 @@ define i1 @test_fold_and_unord(float %a) {
 ; SI: v_cmp_o
 ; SI: s_and_b64
 define i1 @test_fold_and_ord_multi_use(float %a) {
-  %class = call i1 @llvm.amdgcn.class.f32(float %a, i32 35) #1
+  %class = call i1 @llvm.amdgcn.class.f32(float %a, i32 35) nounwind readnone
   store volatile i1 %class, ptr addrspace(1) undef
   %ord = fcmp ord float %a, %a
   %and = and i1 %ord, %class
   ret i1 %and
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll
index 39952d4cd396f5..f11b8e7b825455 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll
@@ -1,15 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cos.f32(float) #0
+declare float @llvm.amdgcn.cos.f32(float) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_cos_f32:
 ; GCN: v_cos_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @v_cos_f32(ptr addrspace(1) %out, float %src) #1 {
-  %cos = call float @llvm.amdgcn.cos.f32(float %src) #0
+define amdgpu_kernel void @v_cos_f32(ptr addrspace(1) %out, float %src) nounwind {
+  %cos = call float @llvm.amdgcn.cos.f32(float %src) nounwind readnone
   store float %cos, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll
index c1e808cb3a0887..4ed5d615f52175 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll
@@ -1,15 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubeid(float, float, float) #0
+declare float @llvm.amdgcn.cubeid(float, float, float) nounwind readnone
 
 ; GCN-LABEL: {{^}}test_cubeid:
 ; GCN: v_cubeid_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_cubeid(ptr addrspace(1) %out, float %a, float %b, float %c) #1 {
+define amdgpu_kernel void @test_cubeid(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind {
   %result = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
   store float %result, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll
index 754f31c97e2324..8ffa9f2ce5849c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll
@@ -1,15 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubema(float, float, float) #0
+declare float @llvm.amdgcn.cubema(float, float, float) nounwind readnone
 
 ; GCN-LABEL: {{^}}test_cubema:
 ; GCN: v_cubema_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_cubema(ptr addrspace(1) %out, float %a, float %b, float %c) #1 {
+define amdgpu_kernel void @test_cubema(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind {
   %result = call float @llvm.amdgcn.cubema(float %a, float %b, float %c)
   store float %result, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll
index 328665fe2c860c..c630f5f68e8bf6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll
@@ -1,15 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubesc(float, float, float) #0
+declare float @llvm.amdgcn.cubesc(float, float, float) nounwind readnone
 
 ; GCN-LABEL: {{^}}test_cubesc:
 ; GCN: v_cubesc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_cubesc(ptr addrspace(1) %out, float %a, float %b, float %c) #1 {
+define amdgpu_kernel void @test_cubesc(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind {
   %result = call float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
   store float %result, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll
index 26af4117cbf908..aeb3776c71ad69 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll
@@ -1,15 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubetc(float, float, float) #0
+declare float @llvm.amdgcn.cubetc(float, float, float) nounwind readnone
 
 ; GCN-LABEL: {{^}}test_cubetc:
 ; GCN: v_cubetc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_cubetc(ptr addrspace(1) %out, float %a, float %b, float %c) #1 {
+define amdgpu_kernel void @test_cubetc(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind {
   %result = call float @llvm.amdgcn.cubetc(float %a, float %b, float %c)
   store float %result, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
index e21d61036375a1..2e7eda4444308b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
@@ -135,8 +135,5 @@ declare i32 @llvm.amdgcn.cvt.pk.fp8.f32(float, float, i32, i1)
 declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32)
 declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
 
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #1
-
-attributes #0 = { nounwind convergent }
-attributes #1 = { nounwind readnone convergent }
+declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) nounwind readnone convergent
+declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) nounwind readnone convergent
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll
index ca7385be5dee7b..003f6083c94c0a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll
@@ -8,7 +8,7 @@
 ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[#LOAD + 3]]
 ; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]]
 ; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]]
-define amdgpu_kernel void @s_cvt_pk_i16_i32(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @s_cvt_pk_i16_i32(ptr addrspace(1) %out, i32 %x, i32 %y) nounwind {
   %result = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 %y)
   %r = bitcast <2 x i16> %result to i32
   store i32 %r, ptr addrspace(1) %out
@@ -18,7 +18,7 @@ define amdgpu_kernel void @s_cvt_pk_i16_i32(ptr addrspace(1) %out, i32 %x, i32 %
 ; GCN-LABEL: {{^}}s_cvt_pk_i16_samereg_i32:
 ; GCN: s_load_dword [[X:s[0-9]+]]
 ; GCN: v_cvt_pk_i16_i32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
-define amdgpu_kernel void @s_cvt_pk_i16_samereg_i32(ptr addrspace(1) %out, i32 %x) #0 {
+define amdgpu_kernel void @s_cvt_pk_i16_samereg_i32(ptr addrspace(1) %out, i32 %x) nounwind {
   %result = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 %x)
   %r = bitcast <2 x i16> %result to i32
   store i32 %r, ptr addrspace(1) %out
@@ -30,7 +30,7 @@ define amdgpu_kernel void @s_cvt_pk_i16_samereg_i32(ptr addrspace(1) %out, i32 %
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, [[A]], [[B]]
 ; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, [[A]], [[B]]
-define amdgpu_kernel void @v_cvt_pk_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pk_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -47,7 +47,7 @@ define amdgpu_kernel void @v_cvt_pk_i16_i32(ptr addrspace(1) %out, ptr addrspace
 ; GCN-LABEL: {{^}}v_cvt_pk_i16_i32_reg_imm:
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_pk_i16_i32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1
-define amdgpu_kernel void @v_cvt_pk_i16_i32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pk_i16_i32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -63,7 +63,7 @@ define amdgpu_kernel void @v_cvt_pk_i16_i32_reg_imm(ptr addrspace(1) %out, ptr a
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, 1, [[A]]
 ; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, 1, [[A]]
-define amdgpu_kernel void @v_cvt_pk_i16_i32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pk_i16_i32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -75,9 +75,5 @@ define amdgpu_kernel void @v_cvt_pk_i16_i32_imm_reg(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll
index b59e584418bd8e..da32558512161c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll
@@ -8,7 +8,7 @@
 ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[#LOAD + 3]]
 ; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]]
 ; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]]
-define amdgpu_kernel void @s_cvt_pk_u16_u32(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @s_cvt_pk_u16_u32(ptr addrspace(1) %out, i32 %x, i32 %y) nounwind {
   %result = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 %y)
   %r = bitcast <2 x i16> %result to i32
   store i32 %r, ptr addrspace(1) %out
@@ -18,7 +18,7 @@ define amdgpu_kernel void @s_cvt_pk_u16_u32(ptr addrspace(1) %out, i32 %x, i32 %
 ; GCN-LABEL: {{^}}s_cvt_pk_u16_samereg_i32:
 ; GCN: s_load_dword [[X:s[0-9]+]]
 ; GCN: v_cvt_pk_u16_u32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
-define amdgpu_kernel void @s_cvt_pk_u16_samereg_i32(ptr addrspace(1) %out, i32 %x) #0 {
+define amdgpu_kernel void @s_cvt_pk_u16_samereg_i32(ptr addrspace(1) %out, i32 %x) nounwind {
   %result = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 %x)
   %r = bitcast <2 x i16> %result to i32
   store i32 %r, ptr addrspace(1) %out
@@ -30,7 +30,7 @@ define amdgpu_kernel void @s_cvt_pk_u16_samereg_i32(ptr addrspace(1) %out, i32 %
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, [[A]], [[B]]
 ; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, [[A]], [[B]]
-define amdgpu_kernel void @v_cvt_pk_u16_u32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pk_u16_u32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -47,7 +47,7 @@ define amdgpu_kernel void @v_cvt_pk_u16_u32(ptr addrspace(1) %out, ptr addrspace
 ; GCN-LABEL: {{^}}v_cvt_pk_u16_u32_reg_imm:
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_pk_u16_u32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1
-define amdgpu_kernel void @v_cvt_pk_u16_u32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pk_u16_u32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -63,7 +63,7 @@ define amdgpu_kernel void @v_cvt_pk_u16_u32_reg_imm(ptr addrspace(1) %out, ptr a
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, 1, [[A]]
 ; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, 1, [[A]]
-define amdgpu_kernel void @v_cvt_pk_u16_u32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pk_u16_u32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -75,9 +75,5 @@ define amdgpu_kernel void @v_cvt_pk_u16_u32_imm_reg(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll
index 0093e30b036444..a339fb9405ad3c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll
@@ -8,7 +8,7 @@
 ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[#LOAD + 3]]
 ; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]]
 ; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]]
-define amdgpu_kernel void @s_cvt_pknorm_i16_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
+define amdgpu_kernel void @s_cvt_pknorm_i16_f32(ptr addrspace(1) %out, float %x, float %y) nounwind {
   %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float %y)
   %r = bitcast <2 x i16> %result to i32
   store i32 %r, ptr addrspace(1) %out
@@ -18,7 +18,7 @@ define amdgpu_kernel void @s_cvt_pknorm_i16_f32(ptr addrspace(1) %out, float %x,
 ; GCN-LABEL: {{^}}s_cvt_pknorm_i16_samereg_f32:
 ; GCN: s_load_dword [[X:s[0-9]+]]
 ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
-define amdgpu_kernel void @s_cvt_pknorm_i16_samereg_f32(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @s_cvt_pknorm_i16_samereg_f32(ptr addrspace(1) %out, float %x) nounwind {
   %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float %x)
   %r = bitcast <2 x i16> %result to i32
   store i32 %r, ptr addrspace(1) %out
@@ -30,7 +30,7 @@ define amdgpu_kernel void @s_cvt_pknorm_i16_samereg_f32(ptr addrspace(1) %out, f
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
 ; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, [[A]], [[B]]
-define amdgpu_kernel void @v_cvt_pknorm_i16_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_i16_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -47,7 +47,7 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32(ptr addrspace(1) %out, ptr addrs
 ; GCN-LABEL: {{^}}v_cvt_pknorm_i16_f32_reg_imm:
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
-define amdgpu_kernel void @v_cvt_pknorm_i16_f32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_i16_f32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -63,7 +63,7 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_reg_imm(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
 ; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, 1.0, [[A]]
-define amdgpu_kernel void @v_cvt_pknorm_i16_f32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_i16_f32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -79,7 +79,7 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_imm_reg(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
-define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -98,7 +98,7 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
-define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -117,7 +117,7 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_hi(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
-define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -137,7 +137,7 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo_hi(ptr addrspace(1) %out
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
-define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_fabs_lo_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_fabs_lo_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -154,10 +154,6 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_fabs_lo_fneg_hi(ptr addrspa
   ret void
 }
 
-declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) #1
-declare float @llvm.fabs.f32(float) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll
index d896090a476651..b6d93b930ed3fc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll
@@ -8,7 +8,7 @@
 ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[#LOAD + 3]]
 ; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]]
 ; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]]
-define amdgpu_kernel void @s_cvt_pknorm_u16_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
+define amdgpu_kernel void @s_cvt_pknorm_u16_f32(ptr addrspace(1) %out, float %x, float %y) nounwind {
   %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %y)
   %r = bitcast <2 x i16> %result to i32
   store i32 %r, ptr addrspace(1) %out
@@ -18,7 +18,7 @@ define amdgpu_kernel void @s_cvt_pknorm_u16_f32(ptr addrspace(1) %out, float %x,
 ; GCN-LABEL: {{^}}s_cvt_pknorm_u16_samereg_f32:
 ; GCN: s_load_dword [[X:s[0-9]+]]
 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
-define amdgpu_kernel void @s_cvt_pknorm_u16_samereg_f32(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @s_cvt_pknorm_u16_samereg_f32(ptr addrspace(1) %out, float %x) nounwind {
   %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %x)
   %r = bitcast <2 x i16> %result to i32
   store i32 %r, ptr addrspace(1) %out
@@ -30,7 +30,7 @@ define amdgpu_kernel void @s_cvt_pknorm_u16_samereg_f32(ptr addrspace(1) %out, f
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
 ; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, [[A]], [[B]]
-define amdgpu_kernel void @v_cvt_pknorm_u16_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_u16_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -47,7 +47,7 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32(ptr addrspace(1) %out, ptr addrs
 ; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_reg_imm:
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
-define amdgpu_kernel void @v_cvt_pknorm_u16_f32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_u16_f32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -63,7 +63,7 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_reg_imm(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
 ; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, 1.0, [[A]]
-define amdgpu_kernel void @v_cvt_pknorm_u16_f32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_u16_f32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -79,7 +79,7 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_imm_reg(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
-define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -98,7 +98,7 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
-define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -117,7 +117,7 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_hi(ptr addrspace(1) %out, p
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
-define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -137,7 +137,7 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo_hi(ptr addrspace(1) %out
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
-define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
@@ -154,10 +154,6 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi(ptr addrspa
   ret void
 }
 
-declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) #1
-declare float @llvm.fabs.f32(float) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
index 920ff8a927e2d1..de909e3f16a9ee 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GFX10
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GFX11
 
-define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
+define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(ptr addrspace(1) %out, float %x, float %y) nounwind {
 ; SI-LABEL: s_cvt_pkrtz_v2f16_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -64,7 +64,7 @@ define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(ptr addrspace(1) %out, float %x
   ret void
 }
 
-define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(ptr addrspace(1) %out, float %x) nounwind {
 ; SI-LABEL: s_cvt_pkrtz_samereg_v2f16_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -125,7 +125,7 @@ define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: s_cvt_pkrtz_undef_undef:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_endpgm
@@ -142,7 +142,7 @@ define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(ptr addrspace(1) %out) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -243,7 +243,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_reg_imm:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -321,7 +321,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_imm_reg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -399,7 +399,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -501,7 +501,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_hi:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -603,7 +603,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo_hi:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -706,7 +706,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -810,10 +810,6 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(ptr addrsp
   ret void
 }
 
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare float @llvm.fabs.f32(float) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll
index f8a1388c9415e7..16f61746b98928 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.dispatch.id() #1
+declare i64 @llvm.amdgcn.dispatch.id() nounwind readnone
 
 ; GCN-LABEL: {{^}}dispatch_id:
 
@@ -9,14 +9,11 @@ declare i64 @llvm.amdgcn.dispatch.id() #1
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
 ; GCN: .amdhsa_user_sgpr_dispatch_id 1
-define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) nounwind {
   %tmp0 = call i64 @llvm.amdgcn.dispatch.id()
   store i64 %tmp0, ptr addrspace(1) %out
   ret void
 }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
index 4fe6eed0ef1f32..df95446561be15 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
@@ -7,7 +7,7 @@
 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
 ; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
 define amdgpu_kernel void @test(ptr addrspace(1) %out) {
-  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
+  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() readnone
   %value = load i32, ptr addrspace(4) %dispatch_ptr
   store i32 %value, ptr addrspace(1) %out
   ret void
@@ -20,7 +20,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) {
 ; GCN: s_endpgm
 ; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
 define amdgpu_kernel void @test2(ptr addrspace(1) %out) {
-  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
+  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() readnone
   %d1 = getelementptr inbounds i8, ptr addrspace(4) %dispatch_ptr, i64 6
   %v1 = load i16, ptr addrspace(4) %d1
   %e1 = zext i16 %v1 to i32
@@ -28,9 +28,7 @@ define amdgpu_kernel void @test2(ptr addrspace(1) %out) {
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
-attributes #0 = { readnone }
+declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
index 300c0032ac9209..f8b81c620920b2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) #1
-declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) #1
-declare float @llvm.fabs.f32(float) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) nounwind readnone speculatable
+declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone speculatable
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable
 
 ; SI-LABEL: {{^}}test_div_scale_f32_1:
 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
@@ -412,7 +412,7 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_den(ptr addrspace(1) %out, pt
 ; SI-LABEL: {{^}}test_div_scale_f32_val_undef_val:
 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000
 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], v{{[0-9]+}}, [[K]]
-define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %out) nounwind {
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
   store float %result0, ptr addrspace(1) %out, align 4
@@ -422,7 +422,7 @@ define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %ou
 ; SI-LABEL: {{^}}test_div_scale_f32_undef_val_val:
 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000
 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[K]], v{{[0-9]+}}
-define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %out) nounwind {
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
   store float %result0, ptr addrspace(1) %out, align 4
@@ -432,7 +432,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %ou
 ; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val:
 ; SI-NOT: v0
 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0
-define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %out) nounwind {
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
   store float %result0, ptr addrspace(1) %out, align 4
@@ -443,12 +443,9 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %
 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000
 ; SI: v_div_scale_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]], v[0:1], s[[[K_LO]]:[[K_HI]]]
-define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) nounwind {
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false)
   %result0 = extractvalue { double, i1 } %result, 0
   store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
index 1a42145ab1d81a..ddd0eb851b2a0a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
@@ -13,7 +13,7 @@
 ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_append_lds(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_append_lds(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %lds, i1 false)
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -25,7 +25,7 @@ define amdgpu_kernel void @ds_append_lds(ptr addrspace(3) %lds, ptr addrspace(1)
 ; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_append_lds_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_append_lds_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16383
   %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %gep, i1 false)
   store i32 %val, ptr addrspace(1) %out
@@ -44,7 +44,7 @@ define amdgpu_kernel void @ds_append_lds_max_offset(ptr addrspace(3) %lds, ptr a
 
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_append_no_fold_offset_si(ptr addrspace(4) %lds.ptr, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_append_no_fold_offset_si(ptr addrspace(4) %lds.ptr, ptr addrspace(1) %out) nounwind {
   %lds = load ptr addrspace(3), ptr addrspace(4) %lds.ptr, align 4
   %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 4
   %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %gep, i1 false)
@@ -65,7 +65,7 @@ define amdgpu_kernel void @ds_append_no_fold_offset_si(ptr addrspace(4) %lds.ptr
 ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_append_lds_over_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_append_lds_over_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16384
   %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %gep, i1 false)
   store i32 %val, ptr addrspace(1) %out
@@ -81,7 +81,7 @@ define amdgpu_kernel void @ds_append_lds_over_max_offset(ptr addrspace(3) %lds,
 ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define void @ds_append_lds_vgpr_addr(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define void @ds_append_lds_vgpr_addr(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %lds, i1 false)
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -93,7 +93,7 @@ define void @ds_append_lds_vgpr_addr(ptr addrspace(3) %lds, ptr addrspace(1) %ou
 ; GCN: ds_append [[RESULT:v[0-9]+]] gds{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_append_gds(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_append_gds(ptr addrspace(2) %gds, ptr addrspace(1) %out) nounwind {
   %val = call i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) %gds, i1 false)
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -105,7 +105,7 @@ define amdgpu_kernel void @ds_append_gds(ptr addrspace(2) %gds, ptr addrspace(1)
 ; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532 gds{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_append_gds_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_append_gds_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(2) %gds, i32 16383
   %val = call i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) %gep, i1 false)
   store i32 %val, ptr addrspace(1) %out
@@ -114,7 +114,7 @@ define amdgpu_kernel void @ds_append_gds_max_offset(ptr addrspace(2) %gds, ptr a
 
 ; GCN-LABEL: {{^}}ds_append_gds_over_max_offset:
 ; GCN-NOT: buffer_wbinvl1
-define amdgpu_kernel void @ds_append_gds_over_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_append_gds_over_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(2) %gds, i32 16384
   %val = call i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) %gep, i1 false)
   store i32 %val, ptr addrspace(1) %out
@@ -130,7 +130,7 @@ define amdgpu_kernel void @ds_append_gds_over_max_offset(ptr addrspace(2) %gds,
 ; GFX9-NOT: m0
 ; GCN: _store_dword
 ; GCN: ds_read_b32
-define amdgpu_kernel void @ds_append_lds_m0_restore(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_append_lds_m0_restore(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %val0 = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %lds, i1 false)
   store i32 %val0, ptr addrspace(1) %out
   %val1 = load volatile i32, ptr addrspace(3) %lds
@@ -142,14 +142,11 @@ define amdgpu_kernel void @ds_append_lds_m0_restore(ptr addrspace(3) %lds, ptr a
 ; GCN: s_load_dword [[PTR:s[0-9]+]]
 ; GCN: s_mov_b32 m0, [[PTR]]
 ; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532{{$}}
-define amdgpu_kernel void @ds_append_lds_no_use(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_append_lds_no_use(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16383
   %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %gep, i1 false)
   ret void
 }
 
-declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) #1
-declare i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) nocapture, i1 immarg) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { argmemonly convergent nounwind }
+declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) argmemonly convergent nounwind
+declare i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) nocapture, i1 immarg) argmemonly convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
index 90e18a881340b3..827fde106ae56f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
@@ -1,11 +1,11 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0
+declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) nounwind readnone convergent
 
 ; CHECK-LABEL: {{^}}ds_bpermute:
 ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @ds_bpermute(ptr addrspace(1) %out, i32 %index, i32 %src) nounwind {
-  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0
+  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) nounwind readnone convergent
   store i32 %bpermute, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -14,7 +14,7 @@ define amdgpu_kernel void @ds_bpermute(ptr addrspace(1) %out, i32 %index, i32 %s
 ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
 define amdgpu_kernel void @ds_bpermute_imm_offset(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind {
   %index = add i32 %base_index, 4
-  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0
+  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) nounwind readnone convergent
   store i32 %bpermute, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -22,7 +22,7 @@ define amdgpu_kernel void @ds_bpermute_imm_offset(ptr addrspace(1) %out, i32 %ba
 ; CHECK-LABEL: {{^}}ds_bpermute_imm_index:
 ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:64
 define amdgpu_kernel void @ds_bpermute_imm_index(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind {
-  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 64, i32 %src) #0
+  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 64, i32 %src) nounwind readnone convergent
   store i32 %bpermute, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -33,7 +33,7 @@ define amdgpu_kernel void @ds_bpermute_imm_index(ptr addrspace(1) %out, i32 %bas
 define void @ds_bpermute_add_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind {
   %index = add i32 %base_index, 1
   %byte_index = shl i32 %index, 2
-  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0
+  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) nounwind readnone convergent
   store i32 %bpermute, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -45,9 +45,7 @@ define void @ds_bpermute_or_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src
   %masked = and i32 %base_index, 62
   %index = or i32 %masked, 1
   %byte_index = shl i32 %index, 2
-  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0
+  %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) nounwind readnone convergent
   store i32 %bpermute, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
index 2694226ace9e7b..271cb5c8532dd2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
@@ -13,7 +13,7 @@
 ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_consume_lds(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_consume_lds(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %lds, i1 false)
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -25,7 +25,7 @@ define amdgpu_kernel void @ds_consume_lds(ptr addrspace(3) %lds, ptr addrspace(1
 ; GCN: ds_consume [[RESULT:v[0-9]+]] offset:65532{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_consume_lds_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_consume_lds_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16383
   %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %gep, i1 false)
   store i32 %val, ptr addrspace(1) %out
@@ -44,7 +44,7 @@ define amdgpu_kernel void @ds_consume_lds_max_offset(ptr addrspace(3) %lds, ptr
 
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_consume_no_fold_offset_si(ptr addrspace(4) %lds.ptr, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_consume_no_fold_offset_si(ptr addrspace(4) %lds.ptr, ptr addrspace(1) %out) nounwind {
   %lds = load ptr addrspace(3), ptr addrspace(4) %lds.ptr, align 4
   %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 4
   %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %gep, i1 false)
@@ -63,7 +63,7 @@ define amdgpu_kernel void @ds_consume_no_fold_offset_si(ptr addrspace(4) %lds.pt
 ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_consume_lds_over_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_consume_lds_over_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16384
   %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %gep, i1 false)
   store i32 %val, ptr addrspace(1) %out
@@ -77,7 +77,7 @@ define amdgpu_kernel void @ds_consume_lds_over_max_offset(ptr addrspace(3) %lds,
 ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define void @ds_consume_lds_vgpr_addr(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define void @ds_consume_lds_vgpr_addr(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %lds, i1 false)
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -89,7 +89,7 @@ define void @ds_consume_lds_vgpr_addr(ptr addrspace(3) %lds, ptr addrspace(1) %o
 ; GCN: ds_consume [[RESULT:v[0-9]+]] gds{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_consume_gds(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_consume_gds(ptr addrspace(2) %gds, ptr addrspace(1) %out) nounwind {
   %val = call i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) %gds, i1 false)
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -101,7 +101,7 @@ define amdgpu_kernel void @ds_consume_gds(ptr addrspace(2) %gds, ptr addrspace(1
 ; GCN: ds_consume [[RESULT:v[0-9]+]] offset:65532 gds{{$}}
 ; GCN-NOT: buffer_wbinvl1
 ; GCN: {{.*}}store{{.*}} [[RESULT]]
-define amdgpu_kernel void @ds_consume_gds_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_consume_gds_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(2) %gds, i32 16383
   %val = call i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) %gep, i1 false)
   store i32 %val, ptr addrspace(1) %out
@@ -110,7 +110,7 @@ define amdgpu_kernel void @ds_consume_gds_max_offset(ptr addrspace(2) %gds, ptr
 
 ; GCN-LABEL: {{^}}ds_consume_gds_over_max_offset:
 ; GCN-NOT: buffer_wbinvl1
-define amdgpu_kernel void @ds_consume_gds_over_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_consume_gds_over_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(2) %gds, i32 16384
   %val = call i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) %gep, i1 false)
   store i32 %val, ptr addrspace(1) %out
@@ -126,7 +126,7 @@ define amdgpu_kernel void @ds_consume_gds_over_max_offset(ptr addrspace(2) %gds,
 ; GFX9-NOT: m0
 ; GCN: _store_dword
 ; GCN: ds_read_b32
-define amdgpu_kernel void @ds_consume_lds_m0_restore(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_consume_lds_m0_restore(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %val0 = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %lds, i1 false)
   store i32 %val0, ptr addrspace(1) %out
   %val1 = load volatile i32, ptr addrspace(3) %lds
@@ -138,14 +138,11 @@ define amdgpu_kernel void @ds_consume_lds_m0_restore(ptr addrspace(3) %lds, ptr
 ; GCN: s_load_dword [[PTR:s[0-9]+]]
 ; GCN: s_mov_b32 m0, [[PTR]]
 ; GCN: ds_consume [[RESULT:v[0-9]+]] offset:65532{{$}}
-define amdgpu_kernel void @ds_consume_lds_no_use(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @ds_consume_lds_no_use(ptr addrspace(3) %lds, ptr addrspace(1) %out) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16383
   %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %gep, i1 false)
   ret void
 }
 
-declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) nocapture, i1 immarg) #1
-declare i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) nocapture, i1 immarg) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { argmemonly convergent nounwind }
+declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) nocapture, i1 immarg) argmemonly convergent nounwind
+declare i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) nocapture, i1 immarg) argmemonly convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
index 95e50da8a4709b..0353d008c52b3b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
@@ -7,13 +7,10 @@
 ; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load (s32) from custom "GWSResource")
 ; MIR-NEXT: S_WAITCNT 0
 ; MIR-NEXT: }
-define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
+define amdgpu_kernel void @gws_barrier_offset0(i32 %val) nounwind {
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0)
   ret void
 }
 
 
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent inaccessiblememonly nounwind }
+declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) convergent inaccessiblememonly nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
index ad5e9f4eb6a632..c4983a99b918fa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
@@ -39,7 +39,7 @@
 ; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load (s32) from custom "GWSResource")
 ; MIR-NEXT: S_WAITCNT 0
 ; MIR-NEXT: }
-define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
+define amdgpu_kernel void @gws_barrier_offset0(i32 %val) nounwind {
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0)
   ret void
 }
@@ -52,7 +52,7 @@ define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
 ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}}
 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier v0 offset:63 gds{{$}}
-define amdgpu_kernel void @gws_barrier_offset63(i32 %val) #0 {
+define amdgpu_kernel void @gws_barrier_offset63(i32 %val) nounwind {
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 63)
   ret void
 }
@@ -69,7 +69,7 @@ define amdgpu_kernel void @gws_barrier_offset63(i32 %val) #0 {
 
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}}
-define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) nounwind {
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %offset)
   ret void
 }
@@ -85,7 +85,7 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
 
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:1 gds{{$}}
-define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
+define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.base) nounwind {
   %offset = add i32 %offset.base, 1
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %offset)
   ret void
@@ -102,7 +102,7 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.ba
 
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}}
-define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
+define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) nounwind {
   %vgpr.offset = call i32 @llvm.amdgcn.workitem.id.x()
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %vgpr.offset)
   ret void
@@ -120,7 +120,7 @@ define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
 
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:3 gds{{$}}
-define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) #0 {
+define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) nounwind {
   %vgpr.offset.base = call i32 @llvm.amdgcn.workitem.id.x()
   %vgpr.offset = add i32 %vgpr.offset.base, 3
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %vgpr.offset)
@@ -143,7 +143,7 @@ define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) #0 {
 
 ; LOOP: s_mov_b32 m0, -1
 ; LOOP: ds_write_b32
-define amdgpu_kernel void @gws_barrier_save_m0_barrier_constant_offset(i32 %val) #0 {
+define amdgpu_kernel void @gws_barrier_save_m0_barrier_constant_offset(i32 %val) nounwind {
   store i32 1, ptr addrspace(3) @lds
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 10)
   store i32 2, ptr addrspace(3) @lds
@@ -165,7 +165,7 @@ define void @gws_barrier_lgkmcnt(i32 %val) {
 ; GCN-LABEL: {{^}}gws_barrier_wait_before:
 ; NOLOOP: s_waitcnt
 ; NOLOOP-NOT: s_waitcnt{{$}}
-define amdgpu_kernel void @gws_barrier_wait_before(i32 %val, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @gws_barrier_wait_before(i32 %val, ptr addrspace(1) %ptr) nounwind {
   store i32 0, ptr addrspace(1) %ptr
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
   ret void
@@ -176,7 +176,7 @@ define amdgpu_kernel void @gws_barrier_wait_before(i32 %val, ptr addrspace(1) %p
 ; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; NOLOOP: load_{{dword|b32}}
-define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, ptr addrspace(1) %ptr) nounwind {
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
   %load = load volatile i32, ptr addrspace(1) %ptr
   ret void
@@ -190,7 +190,7 @@ define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, ptr addrspace(1) %pt
 ; GFX10: s_waitcnt_vscnt null, 0x0
 ; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-define amdgpu_kernel void @gws_barrier_fence_before(i32 %val, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @gws_barrier_fence_before(i32 %val, ptr addrspace(1) %ptr) nounwind {
   store i32 0, ptr addrspace(1) %ptr
   fence release
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
@@ -204,7 +204,7 @@ define amdgpu_kernel void @gws_barrier_fence_before(i32 %val, ptr addrspace(1) %
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; NOLOOP-NEXT: load_{{dword|b32}}
 
-define amdgpu_kernel void @gws_barrier_fence_after(i32 %val, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @gws_barrier_fence_after(i32 %val, ptr addrspace(1) %ptr) nounwind {
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
   fence release
   %load = load volatile i32, ptr addrspace(1) %ptr
@@ -218,7 +218,7 @@ define amdgpu_kernel void @gws_barrier_fence_after(i32 %val, ptr addrspace(1) %p
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; NOLOOP-NEXT: ds_gws_barrier v0 offset:7 gds
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-define amdgpu_kernel void @gws_init_barrier(i32 %val) #0 {
+define amdgpu_kernel void @gws_init_barrier(i32 %val) nounwind {
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 7)
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
   ret void
@@ -231,18 +231,13 @@ define amdgpu_kernel void @gws_init_barrier(i32 %val) #0 {
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; NOLOOP-NEXT: ds_gws_barrier v0 offset:7 gds
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-define amdgpu_kernel void @gws_init_fence_barrier(i32 %val) #0 {
+define amdgpu_kernel void @gws_init_fence_barrier(i32 %val) nounwind {
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 7)
   fence release
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2
-declare i32 @llvm.amdgcn.workitem.id.x() #3
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent inaccessiblememonly nounwind }
-attributes #2 = { convergent inaccessiblememonly nounwind writeonly }
-attributes #3 = { nounwind readnone speculatable }
+declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) convergent inaccessiblememonly nounwind
+declare void @llvm.amdgcn.ds.gws.init(i32, i32) convergent inaccessiblememonly nounwind writeonly
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
index f658ab39f771fb..8d31c490fe5858 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
@@ -25,7 +25,7 @@
 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
 ; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
-define amdgpu_kernel void @gws_init_offset0(i32 %val) #0 {
+define amdgpu_kernel void @gws_init_offset0(i32 %val) nounwind {
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 0)
   ret void
 }
@@ -46,7 +46,7 @@ define amdgpu_kernel void @gws_init_offset0(i32 %val) #0 {
 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
 ; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
-define amdgpu_kernel void @gws_init_offset63(i32 %val) #0 {
+define amdgpu_kernel void @gws_init_offset63(i32 %val) nounwind {
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 63)
   ret void
 }
@@ -62,7 +62,7 @@ define amdgpu_kernel void @gws_init_offset63(i32 %val) #0 {
 
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
 ; NOLOOP: ds_gws_init [[GWS_VAL]] gds{{$}}
-define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) nounwind {
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
   ret void
 }
@@ -78,7 +78,7 @@ define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
 
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
 ; NOLOOP: ds_gws_init [[GWS_VAL]] offset:1 gds{{$}}
-define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
+define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base) nounwind {
   %offset = add i32 %offset.base, 1
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
   ret void
@@ -95,7 +95,7 @@ define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base)
 
 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_init v0 gds{{$}}
-define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
+define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) nounwind {
   %vgpr.offset = call i32 @llvm.amdgcn.workitem.id.x()
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %vgpr.offset)
   ret void
@@ -113,7 +113,7 @@ define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
 
 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_init v0 offset:3 gds{{$}}
-define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 {
+define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) nounwind {
   %vgpr.offset.base = call i32 @llvm.amdgcn.workitem.id.x()
   %vgpr.offset = add i32 %vgpr.offset.base, 3
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %vgpr.offset)
@@ -136,7 +136,7 @@ define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 {
 
 ; LOOP: s_mov_b32 m0, -1
 ; LOOP: ds_write_b32
-define amdgpu_kernel void @gws_init_save_m0_init_constant_offset(i32 %val) #0 {
+define amdgpu_kernel void @gws_init_save_m0_init_constant_offset(i32 %val) nounwind {
   store volatile i32 1, ptr addrspace(3) @lds
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 10)
   store i32 2, ptr addrspace(3) @lds
@@ -159,15 +159,11 @@ define void @gws_init_lgkmcnt(i32 %val) {
 ; NOLOOP-NOT: s_waitcnt
 ; NOLOOP: ds_gws_init
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-define amdgpu_kernel void @gws_init_wait_before(i32 %val, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @gws_init_wait_before(i32 %val, ptr addrspace(1) %ptr) nounwind {
   store i32 0, ptr addrspace(1) %ptr
   call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 7)
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent inaccessiblememonly nounwind writeonly }
-attributes #2 = { nounwind readnone speculatable }
+declare void @llvm.amdgcn.ds.gws.init(i32, i32) convergent inaccessiblememonly nounwind writeonly
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
index da64f7350a921f..09700ae015bfc1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
@@ -25,11 +25,9 @@
 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
 ; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
-define amdgpu_kernel void @gws_sema_br_offset0(i32 %val) #0 {
+define amdgpu_kernel void @gws_sema_br_offset0(i32 %val) convergent inaccessiblememonly nounwind {
   call void @llvm.amdgcn.ds.gws.sema.br(i32 %val, i32 0)
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.sema.br(i32, i32) #0
-
-attributes #0 = { convergent inaccessiblememonly nounwind }
+declare void @llvm.amdgcn.ds.gws.sema.br(i32, i32) convergent inaccessiblememonly nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
index 180ea84dd7a74e..0a267e0630d332 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
@@ -17,11 +17,9 @@
 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
 ; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
-define amdgpu_kernel void @gws_sema_p_offset0(i32 %val) #0 {
+define amdgpu_kernel void @gws_sema_p_offset0(i32 %val) convergent inaccessiblememonly nounwind {
   call void @llvm.amdgcn.ds.gws.sema.p(i32 0)
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.sema.p(i32) #0
-
-attributes #0 = { convergent inaccessiblememonly nounwind }
+declare void @llvm.amdgcn.ds.gws.sema.p(i32) convergent inaccessiblememonly nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
index 818e23b5649d58..598aea8357cfab 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
@@ -27,11 +27,9 @@
 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
 ; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
-define amdgpu_kernel void @gws_sema_release_all_offset0(i32 %val) #0 {
+define amdgpu_kernel void @gws_sema_release_all_offset0(i32 %val) convergent inaccessiblememonly nounwind {
   call void @llvm.amdgcn.ds.gws.sema.release.all(i32 0)
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.sema.release.all(i32) #0
-
-attributes #0 = { convergent inaccessiblememonly nounwind }
+declare void @llvm.amdgcn.ds.gws.sema.release.all(i32) convergent inaccessiblememonly nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
index 215c394409ac00..f1bb6ba5502fcf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
@@ -23,11 +23,9 @@
 ; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
 ; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
 ; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
-define amdgpu_kernel void @gws_sema_v_offset0(i32 %val) #0 {
+define amdgpu_kernel void @gws_sema_v_offset0(i32 %val) convergent inaccessiblememonly nounwind {
   call void @llvm.amdgcn.ds.gws.sema.v(i32 0)
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.sema.v(i32) #0
-
-attributes #0 = { convergent inaccessiblememonly nounwind }
+declare void @llvm.amdgcn.ds.gws.sema.v(i32) convergent inaccessiblememonly nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll
index 6581e251b416a1..e1ef2fe395b6b8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll
@@ -1,11 +1,11 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0
+declare i32 @llvm.amdgcn.ds.permute(i32, i32) nounwind readnone convergent
 
 ; CHECK-LABEL: {{^}}ds_permute:
 ; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @ds_permute(ptr addrspace(1) %out, i32 %index, i32 %src) nounwind {
-  %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0
+  %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) nounwind readnone convergent
   store i32 %bpermute, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -14,9 +14,7 @@ define amdgpu_kernel void @ds_permute(ptr addrspace(1) %out, i32 %index, i32 %sr
 ; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
 define amdgpu_kernel void @ds_permute_imm_offset(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind {
   %index = add i32 %base_index, 4
-  %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0
+  %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) nounwind readnone convergent
   store i32 %bpermute, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
index 038ba91c0d11bd..b121de81d5e024 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
@@ -1,14 +1,12 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0
+declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) nounwind readnone convergent
 
 ; CHECK-LABEL: {{^}}ds_swizzle:
 ; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11")
 define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) nounwind {
-  %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
+  %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) nounwind readnone convergent
   store i32 %swizzle, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll
index 3dbda351b5a173..2dee6dca37817f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll
@@ -5,13 +5,13 @@
 
 ; ERR: error: <unknown>:0:0: in function test_export_compr_zeroes_v2f16 void (): intrinsic not supported on subtarget
 
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #0
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind
+declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) nounwind
 
 ; GCN-LABEL: {{^}}test_export_compr_zeroes_v2f16:
 ; GCN: exp mrt0 off, off, off, off compr{{$}}
 ; GCN: exp mrt0 off, off, off, off done compr{{$}}
-define amdgpu_kernel void @test_export_compr_zeroes_v2f16() #0 {
+define amdgpu_kernel void @test_export_compr_zeroes_v2f16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 false, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 false)
   ret void
@@ -21,7 +21,7 @@ define amdgpu_kernel void @test_export_compr_zeroes_v2f16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 0x40003c00
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x44003800
 ; GCN: exp mrt0 [[SRC0]], [[SRC0]], off, off done compr{{$}}
-define amdgpu_kernel void @test_export_compr_en_src0_v2f16() #0 {
+define amdgpu_kernel void @test_export_compr_en_src0_v2f16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
   ret void
 }
@@ -30,7 +30,7 @@ define amdgpu_kernel void @test_export_compr_en_src0_v2f16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 0x40003c00
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x44003800
 ; GCN: exp mrt0 off, off, [[SRC1]], [[SRC1]] done compr{{$}}
-define amdgpu_kernel void @test_export_compr_en_src1_v2f16() #0 {
+define amdgpu_kernel void @test_export_compr_en_src1_v2f16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
   ret void
 }
@@ -39,7 +39,7 @@ define amdgpu_kernel void @test_export_compr_en_src1_v2f16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 0x40003c00
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x44003800
 ; GCN: exp mrt0 [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] done compr{{$}}
-define amdgpu_kernel void @test_export_compr_en_src0_src1_v2f16() #0 {
+define amdgpu_kernel void @test_export_compr_en_src0_src1_v2f16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
   ret void
 }
@@ -48,7 +48,7 @@ define amdgpu_kernel void @test_export_compr_en_src0_src1_v2f16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 0x40003c00
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x44003800
 ; GCN: exp mrt0 off, [[SRC0]], off, off done compr{{$}}
-define amdgpu_kernel void @test_export_compr_en_invalid2_v2f16() #0 {
+define amdgpu_kernel void @test_export_compr_en_invalid2_v2f16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
   ret void
 }
@@ -57,7 +57,7 @@ define amdgpu_kernel void @test_export_compr_en_invalid2_v2f16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 0x40003c00
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x44003800
 ; GCN: exp mrt0 off, [[SRC0]], off, [[SRC1]] done compr{{$}}
-define amdgpu_kernel void @test_export_compr_en_invalid10_v2f16() #0 {
+define amdgpu_kernel void @test_export_compr_en_invalid10_v2f16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 10, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
   ret void
 }
@@ -66,7 +66,7 @@ define amdgpu_kernel void @test_export_compr_en_invalid10_v2f16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0x38003800
 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] compr{{$}}
 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done compr{{$}}
-define amdgpu_kernel void @test_export_compr_mrt7_v2f16() #0 {
+define amdgpu_kernel void @test_export_compr_mrt7_v2f16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2f16(i32 7, i32 15, <2 x half> <half 0.5, half 0.5>, <2 x half> <half 0.5, half 0.5>, i1 false, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 7, i32 15, <2 x half> <half 0.5, half 0.5>, <2 x half> <half 0.5, half 0.5>, i1 true, i1 false)
   ret void
@@ -77,7 +77,7 @@ define amdgpu_kernel void @test_export_compr_mrt7_v2f16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x44003800
 ; GCN: exp mrtz [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] compr{{$}}
 ; GCN: exp mrtz [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] done compr{{$}}
-define amdgpu_kernel void @test_export_compr_z_v2f16() #0 {
+define amdgpu_kernel void @test_export_compr_z_v2f16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2f16(i32 8, i32 15, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 false, i1 false)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 8, i32 15, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
   ret void
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_export_compr_z_v2f16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x44003800
 ; GCN: exp mrt0 [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] compr vm{{$}}
 ; GCN: exp mrt0 [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] done compr vm{{$}}
-define amdgpu_kernel void @test_export_compr_vm_v2f16() #0 {
+define amdgpu_kernel void @test_export_compr_vm_v2f16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 false, i1 true)
   call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 true)
   ret void
@@ -97,7 +97,7 @@ define amdgpu_kernel void @test_export_compr_vm_v2f16() #0 {
 ; GCN-LABEL: {{^}}test_export_compr_zeroes_v2i16:
 ; GCN: exp mrt0 off, off, off, off compr{{$}}
 ; GCN: exp mrt0 off, off, off, off done compr{{$}}
-define amdgpu_kernel void @test_export_compr_zeroes_v2i16() #0 {
+define amdgpu_kernel void @test_export_compr_zeroes_v2i16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 0, <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, i1 false, i1 false)
   call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 0, <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, i1 true, i1 false)
   ret void
@@ -107,7 +107,7 @@ define amdgpu_kernel void @test_export_compr_zeroes_v2i16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 0x20001
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x40005
 ; GCN: exp mrt0 [[SRC0]], off, off, off done compr{{$}}
-define amdgpu_kernel void @test_export_compr_en_src0_v2i16() #0 {
+define amdgpu_kernel void @test_export_compr_en_src0_v2i16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 1, <2 x i16> <i16 1, i16 2>, <2 x i16> <i16 5, i16 4>, i1 true, i1 false)
   ret void
 }
@@ -116,7 +116,7 @@ define amdgpu_kernel void @test_export_compr_en_src0_v2i16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 0x20001
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x40005
 ; GCN: exp mrt0 off, off, [[SRC1]], [[SRC1]] done compr{{$}}
-define amdgpu_kernel void @test_export_compr_en_src1_v2i16() #0 {
+define amdgpu_kernel void @test_export_compr_en_src1_v2i16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 12, <2 x i16> <i16 1, i16 2>, <2 x i16> <i16 5, i16 4>, i1 true, i1 false)
   ret void
 }
@@ -125,7 +125,7 @@ define amdgpu_kernel void @test_export_compr_en_src1_v2i16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 0x20001
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x40005
 ; GCN: exp mrt0 [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] done compr{{$}}
-define amdgpu_kernel void @test_export_compr_en_src0_src1_v2i16() #0 {
+define amdgpu_kernel void @test_export_compr_en_src0_src1_v2i16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 15, <2 x i16> <i16 1, i16 2>, <2 x i16> <i16 5, i16 4>, i1 true, i1 false)
   ret void
 }
@@ -134,7 +134,7 @@ define amdgpu_kernel void @test_export_compr_en_src0_src1_v2i16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[VI16:v[0-9]+]], 0x50005
 ; GCN: exp mrt7 [[VI16]], [[VI16]], [[VI16]], [[VI16]] compr{{$}}
 ; GCN: exp mrt7 [[VI16]], [[VI16]], [[VI16]], [[VI16]] done compr{{$}}
-define amdgpu_kernel void @test_export_compr_mrt7_v2i16() #0 {
+define amdgpu_kernel void @test_export_compr_mrt7_v2i16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2i16(i32 7, i32 15, <2 x i16> <i16 5, i16 5>, <2 x i16> <i16 5, i16 5>, i1 false, i1 false)
   call void @llvm.amdgcn.exp.compr.v2i16(i32 7, i32 15, <2 x i16> <i16 5, i16 5>, <2 x i16> <i16 5, i16 5>, i1 true, i1 false)
   ret void
@@ -145,7 +145,7 @@ define amdgpu_kernel void @test_export_compr_mrt7_v2i16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x40005
 ; GCN: exp mrtz [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] compr{{$}}
 ; GCN: exp mrtz [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] done compr{{$}}
-define amdgpu_kernel void @test_export_compr_z_v2i16() #0 {
+define amdgpu_kernel void @test_export_compr_z_v2i16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2i16(i32 8, i32 15, <2 x i16> <i16 1, i16 2>, <2 x i16> <i16 5, i16 4>, i1 false, i1 false)
   call void @llvm.amdgcn.exp.compr.v2i16(i32 8, i32 15, <2 x i16> <i16 1, i16 2>, <2 x i16> <i16 5, i16 4>, i1 true, i1 false)
   ret void
@@ -156,10 +156,8 @@ define amdgpu_kernel void @test_export_compr_z_v2i16() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 0x40005
 ; GCN: exp mrt0 [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] compr vm{{$}}
 ; GCN: exp mrt0 [[SRC0]], [[SRC0]], [[SRC1]], [[SRC1]] done compr vm{{$}}
-define amdgpu_kernel void @test_export_compr_vm_v2i16() #0 {
+define amdgpu_kernel void @test_export_compr_vm_v2i16() nounwind {
   call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 15, <2 x i16> <i16 1, i16 2>, <2 x i16> <i16 5, i16 4>, i1 false, i1 true)
   call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 15, <2 x i16> <i16 1, i16 2>, <2 x i16> <i16 5, i16 4>, i1 true, i1 true)
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
index 224de9512c493f..a84c6b8caaf68c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -2,14 +2,14 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly
+declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) nounwind inaccessiblememonly
+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}test_export_zeroes_f32:
 ; GCN: exp mrt0 off, off, off, off{{$}}
 ; GCN: exp mrt0 off, off, off, off done{{$}}
-define amdgpu_kernel void @test_export_zeroes_f32() #0 {
+define amdgpu_kernel void @test_export_zeroes_f32() nounwind {
 
   call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
@@ -24,7 +24,7 @@ define amdgpu_kernel void @test_export_zeroes_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
-define amdgpu_kernel void @test_export_en_src0_f32() #0 {
+define amdgpu_kernel void @test_export_en_src0_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
 }
@@ -35,7 +35,7 @@ define amdgpu_kernel void @test_export_en_src0_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
-define amdgpu_kernel void @test_export_en_src1_f32() #0 {
+define amdgpu_kernel void @test_export_en_src1_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
 }
@@ -46,7 +46,7 @@ define amdgpu_kernel void @test_export_en_src1_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
-define amdgpu_kernel void @test_export_en_src2_f32() #0 {
+define amdgpu_kernel void @test_export_en_src2_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
 }
@@ -57,7 +57,7 @@ define amdgpu_kernel void @test_export_en_src2_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_en_src3_f32() #0 {
+define amdgpu_kernel void @test_export_en_src3_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
 }
@@ -68,7 +68,7 @@ define amdgpu_kernel void @test_export_en_src3_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
-define amdgpu_kernel void @test_export_en_src0_src1_f32() #0 {
+define amdgpu_kernel void @test_export_en_src0_src1_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
 }
@@ -79,7 +79,7 @@ define amdgpu_kernel void @test_export_en_src0_src1_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
-define amdgpu_kernel void @test_export_en_src0_src2_f32() #0 {
+define amdgpu_kernel void @test_export_en_src0_src2_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
 }
@@ -91,7 +91,7 @@ define amdgpu_kernel void @test_export_en_src0_src2_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_en_src0_src3_f32() #0 {
+define amdgpu_kernel void @test_export_en_src0_src3_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -104,7 +104,7 @@ define amdgpu_kernel void @test_export_en_src0_src3_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() #0 {
+define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -114,7 +114,7 @@ define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5
 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
-define amdgpu_kernel void @test_export_mrt7_f32() #0 {
+define amdgpu_kernel void @test_export_mrt7_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 true, i1 false)
   ret void
@@ -127,7 +127,7 @@ define amdgpu_kernel void @test_export_mrt7_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_z_f32() #0 {
+define amdgpu_kernel void @test_export_z_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -140,7 +140,7 @@ define amdgpu_kernel void @test_export_z_f32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_null_f32() #0 {
+define amdgpu_kernel void @test_export_null_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -153,7 +153,7 @@ define amdgpu_kernel void @test_export_null_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_reserved10_f32() #0 {
+define amdgpu_kernel void @test_export_reserved10_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -166,7 +166,7 @@ define amdgpu_kernel void @test_export_reserved10_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_reserved11_f32() #0 {
+define amdgpu_kernel void @test_export_reserved11_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -179,7 +179,7 @@ define amdgpu_kernel void @test_export_reserved11_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_pos0_f32() #0 {
+define amdgpu_kernel void @test_export_pos0_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -192,7 +192,7 @@ define amdgpu_kernel void @test_export_pos0_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_pos3_f32() #0 {
+define amdgpu_kernel void @test_export_pos3_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -205,7 +205,7 @@ define amdgpu_kernel void @test_export_pos3_f32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_param0_f32() #0 {
+define amdgpu_kernel void @test_export_param0_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -218,7 +218,7 @@ define amdgpu_kernel void @test_export_param0_f32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
 ; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_param31_f32() #0 {
+define amdgpu_kernel void @test_export_param31_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -233,7 +233,7 @@ define amdgpu_kernel void @test_export_param31_f32() #0 {
 ; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
 ; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_vm_f32() #0 {
+define amdgpu_kernel void @test_export_vm_f32() nounwind {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true)
   ret void
@@ -256,7 +256,7 @@ define amdgpu_kernel void @test_export_vm_f32() #0 {
 ; GCN-LABEL: {{^}}test_export_zeroes_i32:
 ; GCN: exp mrt0 off, off, off, off{{$}}
 ; GCN: exp mrt0 off, off, off, off done{{$}}
-define amdgpu_kernel void @test_export_zeroes_i32() #0 {
+define amdgpu_kernel void @test_export_zeroes_i32() nounwind {
 
   call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 true, i1 false)
@@ -271,7 +271,7 @@ define amdgpu_kernel void @test_export_zeroes_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
-define amdgpu_kernel void @test_export_en_src0_i32() #0 {
+define amdgpu_kernel void @test_export_en_src0_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 1, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
 }
@@ -282,7 +282,7 @@ define amdgpu_kernel void @test_export_en_src0_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
-define amdgpu_kernel void @test_export_en_src1_i32() #0 {
+define amdgpu_kernel void @test_export_en_src1_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 2, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
 }
@@ -293,7 +293,7 @@ define amdgpu_kernel void @test_export_en_src1_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
-define amdgpu_kernel void @test_export_en_src2_i32() #0 {
+define amdgpu_kernel void @test_export_en_src2_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 4, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
 }
@@ -304,7 +304,7 @@ define amdgpu_kernel void @test_export_en_src2_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_en_src3_i32() #0 {
+define amdgpu_kernel void @test_export_en_src3_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 8, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
 }
@@ -315,7 +315,7 @@ define amdgpu_kernel void @test_export_en_src3_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
-define amdgpu_kernel void @test_export_en_src0_src1_i32() #0 {
+define amdgpu_kernel void @test_export_en_src0_src1_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
 }
@@ -326,7 +326,7 @@ define amdgpu_kernel void @test_export_en_src0_src1_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
-define amdgpu_kernel void @test_export_en_src0_src2_i32() #0 {
+define amdgpu_kernel void @test_export_en_src0_src2_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 5, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
 }
@@ -338,7 +338,7 @@ define amdgpu_kernel void @test_export_en_src0_src2_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
 ; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_en_src0_src3_i32() #0 {
+define amdgpu_kernel void @test_export_en_src0_src3_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -351,7 +351,7 @@ define amdgpu_kernel void @test_export_en_src0_src3_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() #0 {
+define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -361,7 +361,7 @@ define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 5
 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
 ; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
-define amdgpu_kernel void @test_export_mrt7_i32() #0 {
+define amdgpu_kernel void @test_export_mrt7_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 true, i1 false)
   ret void
@@ -374,7 +374,7 @@ define amdgpu_kernel void @test_export_mrt7_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_z_i32() #0 {
+define amdgpu_kernel void @test_export_z_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -387,7 +387,7 @@ define amdgpu_kernel void @test_export_z_i32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_null_i32() #0 {
+define amdgpu_kernel void @test_export_null_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -400,7 +400,7 @@ define amdgpu_kernel void @test_export_null_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_reserved10_i32() #0 {
+define amdgpu_kernel void @test_export_reserved10_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -413,7 +413,7 @@ define amdgpu_kernel void @test_export_reserved10_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_reserved11_i32() #0 {
+define amdgpu_kernel void @test_export_reserved11_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -426,7 +426,7 @@ define amdgpu_kernel void @test_export_reserved11_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_pos0_i32() #0 {
+define amdgpu_kernel void @test_export_pos0_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -439,7 +439,7 @@ define amdgpu_kernel void @test_export_pos0_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_pos3_i32() #0 {
+define amdgpu_kernel void @test_export_pos3_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -452,7 +452,7 @@ define amdgpu_kernel void @test_export_pos3_i32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_param0_i32() #0 {
+define amdgpu_kernel void @test_export_param0_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -465,7 +465,7 @@ define amdgpu_kernel void @test_export_param0_i32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
 ; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_param31_i32() #0 {
+define amdgpu_kernel void @test_export_param31_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -480,7 +480,7 @@ define amdgpu_kernel void @test_export_param31_i32() #0 {
 ; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
 ; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
 ; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
-define amdgpu_kernel void @test_export_vm_i32() #0 {
+define amdgpu_kernel void @test_export_vm_i32() nounwind {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true)
   call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true)
   ret void
@@ -489,7 +489,7 @@ define amdgpu_kernel void @test_export_vm_i32() #0 {
 ; GCN-LABEL: {{^}}test_if_export_f32:
 ; GCN: s_cbranch_execz
 ; GCN: exp
-define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) nounwind {
   %cc = icmp eq i32 %flag, 0
   br i1 %cc, label %end, label %exp
 
@@ -504,7 +504,7 @@ end:
 ; GCN-LABEL: {{^}}test_if_export_vm_f32:
 ; GCN: s_cbranch_execz
 ; GCN: exp
-define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) nounwind {
   %cc = icmp eq i32 %flag, 0
   br i1 %cc, label %end, label %exp
 
@@ -519,7 +519,7 @@ end:
 ; GCN-LABEL: {{^}}test_if_export_done_f32:
 ; GCN: s_cbranch_execz
 ; GCN: exp
-define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) nounwind {
   %cc = icmp eq i32 %flag, 0
   br i1 %cc, label %end, label %exp
 
@@ -534,7 +534,7 @@ end:
 ; GCN-LABEL: {{^}}test_if_export_vm_done_f32:
 ; GCN: s_cbranch_execz
 ; GCN: exp
-define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) nounwind {
   %cc = icmp eq i32 %flag, 0
   br i1 %cc, label %end, label %exp
 
@@ -555,7 +555,7 @@ end:
 ; PREGFX11-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
 ; PREGFX11: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
 ; PREGFX11-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
-define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
+define amdgpu_kernel void @test_export_clustering(float %x, float %y) nounwind {
   %z0 = fadd float %x, %y
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)
   %z1 = fsub float %y, %x
@@ -567,7 +567,7 @@ define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
 ; PREGFX11: exp pos0
 ; PREGFX11-NOT: s_waitcnt
 ; PREGFX11: exp param0
-define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
+define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) nounwind {
   %z0 = fadd float %x, %y
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
   %z1 = fsub float %y, %x
@@ -579,7 +579,7 @@ define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
 ; GFX10: exp pos4
 ; GFX10-NOT: s_waitcnt
 ; GFX10: exp param0
-define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 {
+define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) nounwind {
   %z0 = fadd float %x, %y
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
   %z1 = fsub float %y, %x
@@ -595,7 +595,7 @@ define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0
 ; PREGFX11: exp param0
 ; PREGFX11: exp param1
 ; PREGFX11: exp param2
-define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 {
+define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) nounwind {
   %z0 = fadd float %x, %y
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
@@ -611,7 +611,7 @@ define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float
 ; PREGFX11: exp pos0
 ; PREGFX11-NEXT: exp param0
 ; PREGFX11-NEXT: exp param1
-define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
+define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) nounwind {
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
   %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
@@ -625,7 +625,7 @@ define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0
 ; PREGFX11: exp pos0
 ; PREGFX11: exp param0
 ; PREGFX11: exp param1
-define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 {
+define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) nounwind {
   %data0 = alloca <4 x float>, align 8, addrspace(5)
   %data1 = alloca <4 x float>, align 8, addrspace(5)
   %cmp = icmp eq i32 %idx, 1
@@ -637,7 +637,3 @@ define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0
   call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind inaccessiblememonly }
-attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
index 52441bcb82f5c2..a1902667363f1e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
@@ -2,15 +2,12 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s
 
-declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
+declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) nounwind inaccessiblememonly
 
 ; GCN-LABEL: {{^}}test_export_prim_i32:
 ; NOPRIM: exp invalid_target_20 v0, off, off, off done{{$}}
 ; PRIM: exp prim v0, off, off, off done{{$}}
-define amdgpu_gs void @test_export_prim_i32(i32 inreg %a) #0 {
+define amdgpu_gs void @test_export_prim_i32(i32 inreg %a) nounwind {
   call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %a, i32 undef, i32 undef, i32 undef, i1 true, i1 false)
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind inaccessiblememonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp2.ll
index 99a092e310abbd..1fcfce184de3e5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp2.ll
@@ -68,12 +68,10 @@ define half @v_fneg_fabs_exp2_f16(half %src)  {
   ret half %exp2
 }
 
-declare half @llvm.amdgcn.exp2.f16(half) #0
-declare float @llvm.amdgcn.exp2.f32(float) #0
-declare float @llvm.fabs.f32(float) #0
-declare half @llvm.fabs.f16(half) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare half @llvm.amdgcn.exp2.f16(half) nounwind readnone speculatable willreturn
+declare float @llvm.amdgcn.exp2.f32(float) nounwind readnone speculatable willreturn
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable willreturn
+declare half @llvm.fabs.f16(half) nounwind readnone speculatable willreturn
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GISEL: {{.*}}
 ; SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
index 17b941c59fd3f3..a0ae302e3cf8f2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
@@ -5,12 +5,12 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
 
-declare i32 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
-declare i32 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
-declare float @llvm.fabs.f32(float) #0
+declare i32 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent
+declare i32 @llvm.amdgcn.fcmp.f64(double, double, i32) nounwind readnone convergent
+declare float @llvm.fabs.f32(float) nounwind readnone convergent
 
-declare i32 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
-declare half @llvm.fabs.f16(half) #0
+declare i32 @llvm.amdgcn.fcmp.f16(half, half, i32) nounwind readnone convergent
+declare half @llvm.fabs.f16(half) nounwind readnone convergent
 
 define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float %src, float %a) {
 ; SDAG-GFX11-LABEL: v_fcmp_f32_oeq_with_fabs:
@@ -2640,5 +2640,3 @@ define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) {
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
index ce055d65279966..64c2c43ddd424a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
@@ -6,12 +6,12 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL %s
 
-declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
-declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
-declare float @llvm.fabs.f32(float) #0
+declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent
+declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) nounwind readnone convergent
+declare float @llvm.fabs.f32(float) nounwind readnone convergent
 
-declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
-declare half @llvm.fabs.f16(half) #0
+declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) nounwind readnone convergent
+declare half @llvm.fabs.f16(half) nounwind readnone convergent
 
 define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float %src, float %a) {
 ; GFX11-LABEL: v_fcmp_f32_oeq_with_fabs:
@@ -2943,5 +2943,3 @@ define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) {
   store i64 %result, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
index 212c286b3fe578..202cb52f2584a8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s
 
-declare float @llvm.amdgcn.fdiv.fast(float, float) #0
+declare float @llvm.amdgcn.fdiv.fast(float, float) nounwind readnone
 
 ; CHECK-LABEL: {{^}}test_fdiv_fast:
 ; CHECK: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
@@ -8,11 +8,8 @@ declare float @llvm.amdgcn.fdiv.fast(float, float) #0
 ; CHECK: v_rcp_f32_e32
 ; CHECK: v_mul_f32_e32
 ; CHECK: v_mul_f32_e32
-define amdgpu_kernel void @test_fdiv_fast(ptr addrspace(1) %out, float %a, float %b) #1 {
+define amdgpu_kernel void @test_fdiv_fast(ptr addrspace(1) %out, float %a, float %b) nounwind {
   %fdiv = call float @llvm.amdgcn.fdiv.fast(float %a, float %b)
   store float %fdiv, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll
index 38608387b3a06d..c6e7d3c97ba13b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll
@@ -2,7 +2,7 @@
 
 ; GCN-LABEL: {{^}}test_fmed3_f16:
 ; GCN: v_med3_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_fmed3_f16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 {
+define amdgpu_kernel void @test_fmed3_f16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) nounwind {
   %src0.f16 = trunc i32 %src0.arg to i16
   %src0 = bitcast i16 %src0.f16 to half
   %src1.f16 = trunc i32 %src1.arg to i16
@@ -16,7 +16,7 @@ define amdgpu_kernel void @test_fmed3_f16(ptr addrspace(1) %out, i32 %src0.arg,
 
 ; GCN-LABEL: {{^}}test_fmed3_srcmods_f16:
 ; GCN: v_med3_f16 v{{[0-9]+}}, -s{{[0-9]+}}, |v{{[0-9]+}}|, -|v{{[0-9]+}}|
-define amdgpu_kernel void @test_fmed3_srcmods_f16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 {
+define amdgpu_kernel void @test_fmed3_srcmods_f16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) nounwind {
   %src0.f16 = trunc i32 %src0.arg to i16
   %src0 = bitcast i16 %src0.f16 to half
   %src1.f16 = trunc i32 %src1.arg to i16
@@ -32,8 +32,5 @@ define amdgpu_kernel void @test_fmed3_srcmods_f16(ptr addrspace(1) %out, i32 %sr
   ret void
 }
 
-declare half @llvm.amdgcn.fmed3.f16(half, half, half) #0
-declare half @llvm.fabs.f16(half) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare half @llvm.amdgcn.fmed3.f16(half, half, half) nounwind readnone
+declare half @llvm.fabs.f16(half) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
index 588b8c35a56ace..f83f0211164f09 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
@@ -3,7 +3,7 @@
 
 ; GCN-LABEL: {{^}}test_fmed3:
 ; GCN: v_med3_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
+define amdgpu_kernel void @test_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) nounwind {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
   store float %med3, ptr addrspace(1) %out
   ret void
@@ -11,7 +11,7 @@ define amdgpu_kernel void @test_fmed3(ptr addrspace(1) %out, float %src0, float
 
 ; GCN-LABEL: {{^}}test_fmed3_srcmods:
 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, |v{{[0-9]+}}|, -|v{{[0-9]+}}|
-define amdgpu_kernel void @test_fmed3_srcmods(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
+define amdgpu_kernel void @test_fmed3_srcmods(ptr addrspace(1) %out, float %src0, float %src1, float %src2) nounwind {
   %src0.fneg = fsub float -0.0, %src0
   %src1.fabs = call float @llvm.fabs.f32(float %src1)
   %src2.fabs = call float @llvm.fabs.f32(float %src2)
@@ -23,7 +23,7 @@ define amdgpu_kernel void @test_fmed3_srcmods(ptr addrspace(1) %out, float %src0
 
 ; GCN-LABEL: {{^}}test_fneg_fmed3:
 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
-define amdgpu_kernel void @test_fneg_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
+define amdgpu_kernel void @test_fneg_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) nounwind {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
   %neg.med3 = fsub float -0.0, %med3
   store float %neg.med3, ptr addrspace(1) %out
@@ -33,7 +33,7 @@ define amdgpu_kernel void @test_fneg_fmed3(ptr addrspace(1) %out, float %src0, f
 ; GCN-LABEL: {{^}}test_fneg_fmed3_multi_use:
 ; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, -4.0, [[MED3]]
-define amdgpu_kernel void @test_fneg_fmed3_multi_use(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
+define amdgpu_kernel void @test_fneg_fmed3_multi_use(ptr addrspace(1) %out, float %src0, float %src1, float %src2) nounwind {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
   %neg.med3 = fsub float -0.0, %med3
   %med3.user = fmul float %med3, 4.0
@@ -45,7 +45,7 @@ define amdgpu_kernel void @test_fneg_fmed3_multi_use(ptr addrspace(1) %out, floa
 ; GCN-LABEL: {{^}}test_fabs_fmed3:
 ; GCN: v_med3_f32 [[MED3:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff, [[MED3]]
-define amdgpu_kernel void @test_fabs_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
+define amdgpu_kernel void @test_fabs_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) nounwind {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
   %fabs.med3 = call float @llvm.fabs.f32(float %med3)
   store float %fabs.med3, ptr addrspace(1) %out
@@ -55,7 +55,7 @@ define amdgpu_kernel void @test_fabs_fmed3(ptr addrspace(1) %out, float %src0, f
 ; GCN-LABEL: {{^}}test_fneg_fmed3_rr_0:
 ; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
-define amdgpu_kernel void @test_fneg_fmed3_rr_0(ptr addrspace(1) %out, float %src0, float %src1) #1 {
+define amdgpu_kernel void @test_fneg_fmed3_rr_0(ptr addrspace(1) %out, float %src0, float %src1) nounwind {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
   %neg.med3 = fsub float -0.0, %med3
   store float %neg.med3, ptr addrspace(1) %out
@@ -67,7 +67,7 @@ define amdgpu_kernel void @test_fneg_fmed3_rr_0(ptr addrspace(1) %out, float %sr
 ; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
 ; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
-define amdgpu_kernel void @test_fneg_fmed3_rr_0_foldable_user(ptr addrspace(1) %out, float %src0, float %src1, float %mul.arg) #1 {
+define amdgpu_kernel void @test_fneg_fmed3_rr_0_foldable_user(ptr addrspace(1) %out, float %src0, float %src1, float %mul.arg) nounwind {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
   %neg.med3 = fsub float -0.0, %med3
   %mul = fmul float %neg.med3, %mul.arg
@@ -79,7 +79,7 @@ define amdgpu_kernel void @test_fneg_fmed3_rr_0_foldable_user(ptr addrspace(1) %
 ; GCN-DAG: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
 ; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983
 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
-define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(ptr addrspace(1) %out, float %src0) #1 {
+define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(ptr addrspace(1) %out, float %src0) nounwind {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
   %neg.med3 = fsub float -0.0, %med3
   store float %neg.med3, ptr addrspace(1) %out
@@ -91,7 +91,7 @@ define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(ptr addrspace(1) %out, flo
 ; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983
 ; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
-define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(ptr addrspace(1) %out, float %src0, float %mul.arg) #1 {
+define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(ptr addrspace(1) %out, float %src0, float %mul.arg) nounwind {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
   %neg.med3 = fsub float -0.0, %med3
   %mul = fmul float %neg.med3, %mul.arg
@@ -99,8 +99,5 @@ define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(ptr addrspac
   ret void
 }
 
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
-declare float @llvm.fabs.f32(float) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
index d318bc80e49760..032be32295eba6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
@@ -9,7 +9,7 @@
 ; GCN-LABEL: {{^}}test_mul_legacy_f32:
 ; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
 ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @test_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @test_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b) nounwind {
   %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   store float %result, ptr addrspace(1) %out, align 4
   ret void
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test_mul_legacy_f32(ptr addrspace(1) %out, float %a,
 ; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32:
 ; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
 ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @test_mul_legacy_undef0_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @test_mul_legacy_undef0_f32(ptr addrspace(1) %out, float %a) nounwind {
   %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
   store float %result, ptr addrspace(1) %out, align 4
   ret void
@@ -27,7 +27,7 @@ define amdgpu_kernel void @test_mul_legacy_undef0_f32(ptr addrspace(1) %out, flo
 ; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32:
 ; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
 ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @test_mul_legacy_undef1_f32(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @test_mul_legacy_undef1_f32(ptr addrspace(1) %out, float %a) nounwind {
   %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
   store float %result, ptr addrspace(1) %out, align 4
   ret void
@@ -36,7 +36,7 @@ define amdgpu_kernel void @test_mul_legacy_undef1_f32(ptr addrspace(1) %out, flo
 ; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32:
 ; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}|
 ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |s{{[0-9]+}}|
-define amdgpu_kernel void @test_mul_legacy_fabs_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @test_mul_legacy_fabs_f32(ptr addrspace(1) %out, float %a, float %b) nounwind {
   %a.fabs = call float @llvm.fabs.f32(float %a)
   %b.fabs = call float @llvm.fabs.f32(float %b)
   %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs)
@@ -50,7 +50,7 @@ define amdgpu_kernel void @test_mul_legacy_fabs_f32(ptr addrspace(1) %out, float
 ; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
 ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_add_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #0 {
+define amdgpu_kernel void @test_add_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind {
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   %add = fadd float %mul, %c
   store float %add, ptr addrspace(1) %out, align 4
@@ -66,7 +66,7 @@ define amdgpu_kernel void @test_add_mul_legacy_f32(ptr addrspace(1) %out, float
 ; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
 ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_mad_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 {
+define amdgpu_kernel void @test_mad_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind "denormal-fp-math"="preserve-sign" {
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   %add = fadd float %mul, %c
   store float %add, ptr addrspace(1) %out, align 4
@@ -80,7 +80,7 @@ define amdgpu_kernel void @test_mad_legacy_f32(ptr addrspace(1) %out, float %a,
 ; GFX101: v_mad_legacy_f32 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
 ; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
 ; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_mad_legacy_f32_imm(ptr addrspace(1) %out, float %a, float %c) #2 {
+define amdgpu_kernel void @test_mad_legacy_f32_imm(ptr addrspace(1) %out, float %a, float %c) nounwind "denormal-fp-math"="preserve-sign" {
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float 10.0)
   %add = fadd float %mul, %c
   store float %add, ptr addrspace(1) %out, align 4
@@ -93,7 +93,7 @@ define amdgpu_kernel void @test_mad_legacy_f32_imm(ptr addrspace(1) %out, float
 ; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
 ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
 ; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_mad_legacy_fneg_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 {
+define amdgpu_kernel void @test_mad_legacy_fneg_f32(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind "denormal-fp-math"="preserve-sign" {
   %a.fneg = fneg float %a
   %b.fneg = fneg float %b
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
@@ -102,9 +102,5 @@ define amdgpu_kernel void @test_mad_legacy_fneg_f32(ptr addrspace(1) %out, float
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "denormal-fp-math"="preserve-sign" }
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.amdgcn.fmul.legacy(float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
index 71cde1eece262b..867b5b97bad489 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.fract.f32(float) #0
-declare double @llvm.amdgcn.fract.f64(double) #0
+declare float @llvm.amdgcn.fract.f32(float) nounwind readnone
+declare double @llvm.amdgcn.fract.f64(double) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_fract_f32:
 ; GCN: v_fract_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @v_fract_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @v_fract_f32(ptr addrspace(1) %out, float %src) nounwind {
   %fract = call float @llvm.amdgcn.fract.f32(float %src)
   store float %fract, ptr addrspace(1) %out
   ret void
@@ -14,7 +14,7 @@ define amdgpu_kernel void @v_fract_f32(ptr addrspace(1) %out, float %src) #1 {
 
 ; GCN-LABEL: {{^}}v_fract_f64:
 ; GCN: v_fract_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @v_fract_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @v_fract_f64(ptr addrspace(1) %out, double %src) nounwind {
   %fract = call double @llvm.amdgcn.fract.f64(double %src)
   store double %fract, ptr addrspace(1) %out
   ret void
@@ -23,11 +23,8 @@ define amdgpu_kernel void @v_fract_f64(ptr addrspace(1) %out, double %src) #1 {
 ; GCN-LABEL: {{^}}v_fract_undef_f32:
 ; GCN-NOT: v_fract_f32
 ; GCN-NOT: store_dword
-define amdgpu_kernel void @v_fract_undef_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @v_fract_undef_f32(ptr addrspace(1) %out) nounwind {
   %fract = call float @llvm.amdgcn.fract.f32(float undef)
   store float %fract, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
index 43f2a5ad73092e..65fdc53e4b70e5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
@@ -1,15 +1,15 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s  | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.copysign.f32(float, float) #0
-declare double @llvm.fabs.f64(double) #0
-declare i32 @llvm.amdgcn.frexp.exp.i32.f32(float) #0
-declare i32 @llvm.amdgcn.frexp.exp.i32.f64(double) #0
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.copysign.f32(float, float) nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare i32 @llvm.amdgcn.frexp.exp.i32.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.frexp.exp.i32.f64(double) nounwind readnone
 
 ; GCN-LABEL: {{^}}s_test_frexp_exp_f32:
 ; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @s_test_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @s_test_frexp_exp_f32(ptr addrspace(1) %out, float %src) nounwind {
   %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %src)
   store i32 %frexp.exp, ptr addrspace(1) %out
   ret void
@@ -17,7 +17,7 @@ define amdgpu_kernel void @s_test_frexp_exp_f32(ptr addrspace(1) %out, float %sr
 
 ; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f32:
 ; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) nounwind {
   %fabs.src = call float @llvm.fabs.f32(float %src)
   %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fabs.src)
   store i32 %frexp.exp, ptr addrspace(1) %out
@@ -26,7 +26,7 @@ define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, floa
 
 ; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f32:
 ; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) nounwind {
   %fabs.src = call float @llvm.fabs.f32(float %src)
   %fneg.fabs.src = fneg float %fabs.src
   %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fneg.fabs.src)
@@ -36,7 +36,7 @@ define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out,
 
 ; GCN-LABEL: {{^}}s_test_copysign_frexp_exp_f32:
 ; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @s_test_copysign_frexp_exp_f32(ptr addrspace(1) %out, float %src, float %sign) #1 {
+define amdgpu_kernel void @s_test_copysign_frexp_exp_f32(ptr addrspace(1) %out, float %src, float %sign) nounwind {
   %copysign = call float @llvm.copysign.f32(float %src, float %sign)
   %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %copysign)
   store i32 %frexp.exp, ptr addrspace(1) %out
@@ -45,7 +45,7 @@ define amdgpu_kernel void @s_test_copysign_frexp_exp_f32(ptr addrspace(1) %out,
 
 ; GCN-LABEL: {{^}}s_test_frexp_exp_f64:
 ; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %src) nounwind {
   %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %src)
   store i32 %frexp.exp, ptr addrspace(1) %out
   ret void
@@ -53,7 +53,7 @@ define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %s
 
 ; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f64:
 ; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) nounwind {
   %fabs.src = call double @llvm.fabs.f64(double %src)
   %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fabs.src)
   store i32 %frexp.exp, ptr addrspace(1) %out
@@ -62,13 +62,10 @@ define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, doub
 
 ; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f64:
 ; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) nounwind {
   %fabs.src = call double @llvm.fabs.f64(double %src)
   %fneg.fabs.src = fneg double %fabs.src
   %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fneg.fabs.src)
   store i32 %frexp.exp, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll
index a27034a852061c..fc790d88da9935 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll
@@ -1,14 +1,14 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s  | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.fabs.f32(float) #0
-declare double @llvm.fabs.f64(double) #0
-declare float @llvm.amdgcn.frexp.mant.f32(float) #0
-declare double @llvm.amdgcn.frexp.mant.f64(double) #0
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone
+declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone
 
 ; GCN-LABEL: {{^}}s_test_frexp_mant_f32:
 ; GCN: v_frexp_mant_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @s_test_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @s_test_frexp_mant_f32(ptr addrspace(1) %out, float %src) nounwind {
   %frexp.mant = call float @llvm.amdgcn.frexp.mant.f32(float %src)
   store float %frexp.mant, ptr addrspace(1) %out
   ret void
@@ -16,7 +16,7 @@ define amdgpu_kernel void @s_test_frexp_mant_f32(ptr addrspace(1) %out, float %s
 
 ; GCN-LABEL: {{^}}s_test_fabs_frexp_mant_f32:
 ; GCN: v_frexp_mant_f32_e64 {{v[0-9]+}}, |{{s[0-9]+}}|
-define amdgpu_kernel void @s_test_fabs_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @s_test_fabs_frexp_mant_f32(ptr addrspace(1) %out, float %src) nounwind {
   %fabs.src = call float @llvm.fabs.f32(float %src)
   %frexp.mant = call float @llvm.amdgcn.frexp.mant.f32(float %fabs.src)
   store float %frexp.mant, ptr addrspace(1) %out
@@ -25,7 +25,7 @@ define amdgpu_kernel void @s_test_fabs_frexp_mant_f32(ptr addrspace(1) %out, flo
 
 ; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_mant_f32:
 ; GCN: v_frexp_mant_f32_e64 {{v[0-9]+}}, -|{{s[0-9]+}}|
-define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f32(ptr addrspace(1) %out, float %src) nounwind {
   %fabs.src = call float @llvm.fabs.f32(float %src)
   %fneg.fabs.src = fneg float %fabs.src
   %frexp.mant = call float @llvm.amdgcn.frexp.mant.f32(float %fneg.fabs.src)
@@ -35,7 +35,7 @@ define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f32(ptr addrspace(1) %out
 
 ; GCN-LABEL: {{^}}s_test_frexp_mant_f64:
 ; GCN: v_frexp_mant_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @s_test_frexp_mant_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @s_test_frexp_mant_f64(ptr addrspace(1) %out, double %src) nounwind {
   %frexp.mant = call double @llvm.amdgcn.frexp.mant.f64(double %src)
   store double %frexp.mant, ptr addrspace(1) %out
   ret void
@@ -43,7 +43,7 @@ define amdgpu_kernel void @s_test_frexp_mant_f64(ptr addrspace(1) %out, double %
 
 ; GCN-LABEL: {{^}}s_test_fabs_frexp_mant_f64:
 ; GCN: v_frexp_mant_f64_e64 {{v\[[0-9]+:[0-9]+\]}}, |{{s\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @s_test_fabs_frexp_mant_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @s_test_fabs_frexp_mant_f64(ptr addrspace(1) %out, double %src) nounwind {
   %fabs.src = call double @llvm.fabs.f64(double %src)
   %frexp.mant = call double @llvm.amdgcn.frexp.mant.f64(double %fabs.src)
   store double %frexp.mant, ptr addrspace(1) %out
@@ -52,13 +52,10 @@ define amdgpu_kernel void @s_test_fabs_frexp_mant_f64(ptr addrspace(1) %out, dou
 
 ; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_mant_f64:
 ; GCN: v_frexp_mant_f64_e64 {{v\[[0-9]+:[0-9]+\]}}, -|{{s\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f64(ptr addrspace(1) %out, double %src) nounwind {
   %fabs.src = call double @llvm.fabs.f64(double %src)
   %fneg.fabs.src = fneg double %fabs.src
   %frexp.mant = call double @llvm.amdgcn.frexp.mant.f64(double %fneg.fabs.src)
   store double %frexp.mant, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll
index da4950f7b612c7..f0f56f5cda2320 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll
@@ -14,10 +14,10 @@
 ; CHECK-LABEL: {{^}}groupstaticsize_test0:
 ; NOHSA: v_mov_b32_e32 v{{[0-9]+}}, llvm.amdgcn.groupstaticsize at abs32@lo
 ; HSA: v_mov_b32_e32 v{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @groupstaticsize_test0(ptr addrspace(1) %out, ptr addrspace(1) %lds_size) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @groupstaticsize_test0(ptr addrspace(1) %out, ptr addrspace(1) %lds_size) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 64
-  %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1
+  %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() nounwind readnone
   store i32 %static_lds_size, ptr addrspace(1) %lds_size, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -31,9 +31,9 @@ define amdgpu_kernel void @groupstaticsize_test0(ptr addrspace(1) %out, ptr addr
 ; HSA: v_mov_b32_e32 v{{[0-9]+}}, 0xc00{{$}}
 define amdgpu_kernel void @groupstaticsize_test1(ptr addrspace(1) %out, i32 %cond, ptr addrspace(1) %lds_size) {
 entry:
-  %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1
+  %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() nounwind readnone
   store i32 %static_lds_size, ptr addrspace(1) %lds_size, align 4
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 64
   %tmp = icmp eq i32 %cond, 0
   br i1 %tmp, label %if, label %else
@@ -58,7 +58,7 @@ endif:                                            ; preds = %else, %if
 ; CHECK-LABEL: {{^}}large_groupstaticsize:
 ; NOHSA: v_mov_b32_e32 v{{[0-9]+}}, llvm.amdgcn.groupstaticsize at abs32@lo
 ; HSA: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}}
-define amdgpu_kernel void @large_groupstaticsize(ptr addrspace(1) %size, i32 %idx) #0 {
+define amdgpu_kernel void @large_groupstaticsize(ptr addrspace(1) %size, i32 %idx) nounwind {
   %gep = getelementptr inbounds [4096 x i32], ptr addrspace(3) @large, i32 0, i32 %idx
   store volatile i32 0, ptr addrspace(3) %gep
   %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize()
@@ -66,8 +66,5 @@ define amdgpu_kernel void @large_groupstaticsize(ptr addrspace(1) %size, i32 %id
   ret void
 }
 
-declare i32 @llvm.amdgcn.groupstaticsize() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.groupstaticsize() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
index 309fd99031155d..6310f498b112fc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
@@ -13,10 +13,10 @@
 
 ; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
 
-declare i32 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
-declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
-declare i32 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
-declare i32 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
+declare i32 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
+declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
+declare i32 @llvm.amdgcn.icmp.i16(i16, i16, i32) nounwind readnone convergent
+declare i32 @llvm.amdgcn.icmp.i1(i1, i1, i32) nounwind readnone convergent
 
 define amdgpu_kernel void @v_icmp_i32_eq(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX11-LABEL: v_icmp_i32_eq:
@@ -1802,7 +1802,5 @@ define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(ptr addrspace(1) %out, i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone convergent }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
index 5f979e0177f588..793ae2ae037563 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
@@ -16,10 +16,10 @@
 
 ; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
 
-declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
-declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
-declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
-declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
+declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) nounwind readnone convergent
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) nounwind readnone convergent
 
 define amdgpu_kernel void @v_icmp_i32_eq(ptr addrspace(1) %out, i32 %src) {
 ; GFX11-LABEL: v_icmp_i32_eq:
@@ -2039,7 +2039,5 @@ define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(ptr addrspace(1) %out, i32
   store i64 %result, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone convergent }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir
index da1d9972e42dcf..31a9c62597447a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
 
 --- |
-  define amdgpu_kernel void @largeInterleave() #0 { ret void }
+  define amdgpu_kernel void @largeInterleave() "amdgpu-flat-work-group-size"="256,256" { ret void }
   ; GCN-LABEL: largeInterleave:
   ; GCN:       ; %bb.0:
   ; GCN-NEXT:    ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -1145,7 +1145,6 @@
   ; GCN-NEXT:    s_waitcnt vmcnt(8)
   ; GCN-NEXT:    ;;#ASMEND
   ; GCN-NEXT:    s_endpgm
-  attributes #0 = {"amdgpu-flat-work-group-size"="256,256"}
   !0 = !{i64 2862105}
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir
index 0473e017f193cb..85e0a3e9ab76a3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
 
 --- |
-  define amdgpu_kernel void @smallInterleave() #0 { ret void }
+  define amdgpu_kernel void @smallInterleave() "amdgpu-flat-work-group-size"="256,256" { ret void }
   ; GCN-LABEL: smallInterleave:
   ; GCN:       ; %bb.0:
   ; GCN-NEXT:    ; implicit-def: $vgpr2
@@ -488,7 +488,6 @@
   ; GCN-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[0:1]
   ; GCN-NEXT:    v_fmac_f32_e32 v2, v4, v16
   ; GCN-NEXT:    s_endpgm
-  attributes #0 = {"amdgpu-flat-work-group-size"="256,256"}
 
   !0 = !{i64 2862105}
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
index dba67a03c000e5..60c4f8f9aea120 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
@@ -1,17 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-define amdgpu_kernel void @test_iglp_opt() #0 {
+define amdgpu_kernel void @test_iglp_opt() nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_iglp_opt:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    ; iglp_opt mask(0x00000000)
 ; GCN-NEXT:    s_endpgm
 entry:
-  call void @llvm.amdgcn.iglp.opt(i32 0) #1
+  call void @llvm.amdgcn.iglp.opt(i32 0) convergent nounwind
   ret void
 }
 
-define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
+define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_iglp_opt_mfma_gemm:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -148,7 +148,7 @@ entry:
 }
 
 
-define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
+define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_iglp_opt_rev_mfma_gemm:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -284,9 +284,6 @@ entry:
 }
 
 
-declare void @llvm.amdgcn.iglp.opt(i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) #1
-
-attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" }
-attributes #1 = { convergent nounwind }
+declare void @llvm.amdgcn.iglp.opt(i32) convergent nounwind
+declare i32 @llvm.amdgcn.workitem.id.x() convergent nounwind
+declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
index 31ba2f224bbae0..462fd066a41aef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
@@ -1276,7 +1276,7 @@ main_body:
   ret void
 }
 
-define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 {
+define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) nounwind {
 ; GFX9-LABEL: getresinfo_dmask0:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -1294,53 +1294,49 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
 
-declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind
 
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind
 
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
 
-declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readonly
+declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll
index 7c8a395b488f3a..4b48119d2fb080 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll
@@ -1438,7 +1438,7 @@ main_body:
   ret void
 }
 
-define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 {
+define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) nounwind {
 ; GFX9-LABEL: getresinfo_dmask0:
 ; GFX9:       ; %bb.0: ; %main_body
 ; GFX9-NEXT:    ; return to shader part epilog
@@ -1460,53 +1460,49 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
 
-declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind
 
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind
 
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readnone
 
-declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readonly
+declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
index f13b897971707a..7b39f1d52751bd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
@@ -282,31 +282,27 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind
 
-declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32, i32) #0
-declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32, i32) #0
+declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32, i32) nounwind
+declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32, i32) nounwind
 
-declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
index b4f05bce376804..0076decbddd4d9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
@@ -168,22 +168,18 @@ main_body:
   ret void
 }
 
-declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v3f16.i32(<3 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+
+declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.3d.v3f16.i32(<3 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
index 38c1f10f2c011f..a8dcb228b14251 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
@@ -260,7 +260,7 @@ main_body:
 ; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf
 ; GCN: s_waitcnt vmcnt(0)
 ; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf
-define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
+define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) nounwind {
 main_body:
   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
   %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
@@ -270,7 +270,7 @@ main_body:
 
 ; GCN-LABEL: image_load_mmo
 ; GCN: image_load v1, v[{{[0-9:]+}}], s[0:7] dmask:0x1 unorm
-define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 {
+define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) nounwind {
   store float 0.000000e+00, ptr addrspace(3) %lds
   %c0 = extractelement <2 x i32> %c, i32 0
   %c1 = extractelement <2 x i32> %c, i32 1
@@ -289,42 +289,38 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
-
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+
+declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+
+declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) nounwind
+
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
index db72e6c6d0c174..591c43df0a5a51 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
@@ -4138,7 +4138,7 @@ main_body:
   ret float %r
 }
 
-define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
+define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) nounwind {
 ; VERDE-LABEL: getresinfo_dmask0:
 ; VERDE:       ; %bb.0: ; %main_body
 ; VERDE-NEXT:    ; return to shader part epilog
@@ -4168,7 +4168,7 @@ main_body:
 }
 
 ;
-define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
+define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) nounwind {
 ; VERDE-LABEL: image_store_wait:
 ; VERDE:       ; %bb.0: ; %main_body
 ; VERDE-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
@@ -4236,7 +4236,7 @@ main_body:
   ret void
 }
 
-define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 {
+define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) nounwind {
 ; VERDE-LABEL: image_load_mmo:
 ; VERDE:       ; %bb.0:
 ; VERDE-NEXT:    image_load v1, v[1:2], s[0:7] dmask:0x1 unorm
@@ -4319,70 +4319,66 @@ define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %
   ret float %tex
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {float,i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {float,i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
 
-declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
 
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind
 
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <3 x float> @llvm.amdgcn.image.getresinfo.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <2 x float> @llvm.amdgcn.image.getresinfo.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
+declare <3 x float> @llvm.amdgcn.image.getresinfo.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.getresinfo.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
 
-declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) nounwind
+declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
index 3f88ab1f64912f..b8fe578a756fda 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -481,25 +481,21 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
index 3a5a60896ee245..5f158ad62fd3e0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
@@ -22,8 +22,4 @@ main_body:
   ret <2 x float> %r
 }
 
-declare <4 x half> @llvm.amdgcn.image.gather4.b.2d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x half> @llvm.amdgcn.image.gather4.b.2d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
index b5faae131d983a..eac7dde3aba4d1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
@@ -190,26 +190,22 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
index e7a57d51a0dc00..34ccce3b00dc90 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
@@ -100,22 +100,18 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll
index fe65d6e104bb69..a701ef8e5ccebb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll
@@ -36,8 +36,6 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.image.getlod.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.image.getlod.2d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.getlod.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
+declare <2 x float> @llvm.amdgcn.image.getlod.2d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll
index 5ffdbb0f8c5b07..f72ea4f15abfc0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll
@@ -1225,16 +1225,14 @@ main_body:
   ret [4 x float] %i7
 }
 
-declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly willreturn
+declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly willreturn
+declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly willreturn
 
-declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly willreturn
+declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly willreturn
+declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly willreturn
 
-declare half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly willreturn
 
 declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32)
-
-attributes #0 = { nounwind readonly willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
index 9a5d4855e8afc8..9d41af3ec8b313 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
@@ -150,6 +150,3 @@ main_body:
 declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32, i16, <8 x i32>, i32, i32) #2
 declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
 declare <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
index 3e5a5243a69e42..bb16f6953b2de3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
@@ -150,6 +150,3 @@ main_body:
 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
index 1348315e72e7bc..1c9638db8473c2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
@@ -257,20 +257,17 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
 
-declare <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) nounwind readonly
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll
index b5b5944fffda9e..d6b04af24aa09b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll
@@ -122,14 +122,11 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+
+declare float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
index 4cd761b555d6e3..16c24245d68f59 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
@@ -151,15 +151,12 @@ define amdgpu_ps float @sample_def_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x
   ret float %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare float @llvm.rint.f32(float) #2
-declare float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
-
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone speculatable willreturn }
+declare float @llvm.rint.f32(float) nounwind readnone speculatable willreturn
+declare float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
index 42005499bded8b..ca5826b1309266 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
@@ -1619,53 +1619,49 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.a16.dim.ll
index 6027d734184190..af96ea59efecbe 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.a16.dim.ll
@@ -176,15 +176,11 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.dim.ll
index 28a06115116625..9a698ea03ced99 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.dim.ll
@@ -164,15 +164,11 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.encode.ll
index 59f5a27087240d..33bf8b98c6d333 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.encode.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.encode.ll
@@ -99,15 +99,11 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.ll
index 0e8770f3da038a..68a090f216e1dc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.ll
@@ -99,15 +99,11 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
index 48491729f109ac..795ea4d6441d99 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
@@ -577,18 +577,14 @@ main_body:
   ret <4 x float> %r
 }
 
-declare half @llvm.amdgcn.image.sample.2d.f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <3 x half> @llvm.amdgcn.image.sample.2d.v3f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<2 x half>,i32} @llvm.amdgcn.image.sample.2d.v2f16i32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<2 x half>,i32} @llvm.amdgcn.image.sample.c.d.1d.v2f16i32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<4 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v4f16i32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare half @llvm.amdgcn.image.sample.2d.f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <3 x half> @llvm.amdgcn.image.sample.2d.v3f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare {<2 x half>,i32} @llvm.amdgcn.image.sample.2d.v2f16i32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare {<2 x half>,i32} @llvm.amdgcn.image.sample.c.d.1d.v2f16i32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare {<4 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v4f16i32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
index c8421c66f97c38..2c1eeb9bee9782 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -2713,54 +2713,50 @@ main_body:
   ret <2 x float> %out
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll
index 42fa415b5da621..95f44e7f061faa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll
@@ -429,27 +429,27 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
 define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
 ; GFX10-LABEL: sample_g16_noa16_d_1d:
@@ -868,27 +868,27 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
 define amdgpu_ps <4 x float> @sample_d_1d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
 ; GFX10-LABEL: sample_d_1d_g16_a16:
@@ -969,7 +969,3 @@ main_body:
 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half,  half,  half, <8 x i32>, <4 x i32>, i1, i32, i32)
 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half,  half,  half,  half,  half,  half, <8 x i32>, <4 x i32>, i1, i32, i32)
 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half,  half,  half,  half,  half,  half,  half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
index aa24ac394730ec..05ada0503ac852 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
@@ -310,19 +310,15 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
index 449c4ebec889df..b762400d860abd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
@@ -310,19 +310,15 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
index 67e6bb7e761f5e..b3a3fe1000ed14 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
@@ -321,51 +321,47 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
+declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
index 554b961beaf7b3..15774734f11145 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
@@ -391,6 +391,3 @@ main_body:
 declare void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half>, i32, i16, <8 x i32>, i32, i32) #2
 declare void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half>, i32, i16, i16, <8 x i32>, i32, i32) #2
 declare void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half>, i32, i16, i16, i16, <8 x i32>, i32, i32) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
index 5d4c91ad8a510f..4b699000286c22 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
@@ -379,6 +379,3 @@ main_body:
 declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #2
 declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #2
 declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.hsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.hsa.ll
index 77f57b0322711b..87fd071ff7d1e9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.hsa.ll
@@ -1,7 +1,7 @@
 ; RUN: not llc -mtriple=amdgcn-amd-amdhsa < %s 2>&1 | FileCheck -check-prefix=ERROR %s
 
 ; ERROR: in function test_kernel{{.*}}: non-hsa intrinsic with hsa target
-define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out) nounwind {
   %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr()
   %value = load i32, ptr addrspace(4) %implicit_buffer_ptr
   store i32 %value, ptr addrspace(1) %out
@@ -9,14 +9,11 @@ define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out) #1 {
 }
 
 ; ERROR: in function test_func{{.*}}: non-hsa intrinsic with hsa target
-define void @test_func(ptr addrspace(1) %out) #1 {
+define void @test_func(ptr addrspace(1) %out) nounwind {
   %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr()
   %value = load i32, ptr addrspace(4) %implicit_buffer_ptr
   store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind  }
+declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.ll
index 8598b78deccf50..d34bbd1acdd773 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.ll
@@ -7,7 +7,7 @@
 ; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: ; return
-define amdgpu_ps i32 @test_ps() #1 {
+define amdgpu_ps i32 @test_ps() nounwind {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr()
@@ -19,7 +19,7 @@ define amdgpu_ps i32 @test_ps() #1 {
 ; GCN: s_mov_b64 s[4:5], s[0:1]
 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], 0{{$}}
 ; GCN: s_load_dword s0, s[0:1], 0x0
-define amdgpu_cs i32 @test_cs() #1 {
+define amdgpu_cs i32 @test_cs() nounwind {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
   %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr()
@@ -27,7 +27,4 @@ define amdgpu_cs i32 @test_cs() #1 {
   ret i32 %value
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind }
+declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index 70eff494501532..8eee618ecd3ce4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -13,7 +13,7 @@
 
 ; COV4: .amdhsa_kernarg_size 56
 ; COV5: .amdhsa_kernarg_size 256
-define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
+define amdgpu_kernel void @kernel_implicitarg_ptr_empty() nounwind noinline {
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   ret void
@@ -31,7 +31,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
 
 ; COV4: .amdhsa_kernarg_size 0
 ; COV5: .amdhsa_kernarg_size 0
-define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
+define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() nounwind noinline "amdgpu-implicitarg-num-bytes"="0" {
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   ret void
@@ -46,7 +46,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
 ; HSA: s_load_dword s0, s[4:5], 0x0
 
 ; HSA: .amdhsa_kernarg_size 48
-define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
+define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() nounwind noinline "amdgpu-implicitarg-num-bytes"="48" {
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   ret void
@@ -62,7 +62,7 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
 
 ; COV4: .amdhsa_kernarg_size 168
 ; COV5: .amdhsa_kernarg_size 368
-define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
+define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) nounwind noinline {
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   ret void
@@ -77,7 +77,7 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
 ; HSA: s_load_dword s0, s[4:5], 0x1c
 
 ; HSA: .amdhsa_kernarg_size 160
-define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
+define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) nounwind noinline "amdgpu-implicitarg-num-bytes"="48" {
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   ret void
@@ -88,7 +88,7 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @func_implicitarg_ptr() #0 {
+define void @func_implicitarg_ptr() nounwind noinline {
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   ret void
@@ -99,7 +99,7 @@ define void @func_implicitarg_ptr() #0 {
 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @opencl_func_implicitarg_ptr() #0 {
+define void @opencl_func_implicitarg_ptr() nounwind noinline {
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   ret void
@@ -116,7 +116,7 @@ define void @opencl_func_implicitarg_ptr() #0 {
 
 ; COV4: .amdhsa_kernarg_size 56
 ; COV5: .amdhsa_kernarg_size 256
-define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
+define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() nounwind noinline {
   call void @func_implicitarg_ptr()
   ret void
 }
@@ -131,7 +131,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
 ; GCN: s_swappc_b64
 
 ; HSA: .amdhsa_kernarg_size 0
-define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 {
+define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() nounwind noinline "amdgpu-implicitarg-num-bytes"="0" {
   call void @func_implicitarg_ptr()
   ret void
 }
@@ -145,7 +145,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3
 ; GCN: s_swappc_b64
 
 ; HSA: .amdhsa_kernarg_size 48
-define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
+define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() nounwind noinline "amdgpu-implicitarg-num-bytes"="48" {
   call void @func_implicitarg_ptr()
   ret void
 }
@@ -163,7 +163,7 @@ define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
 
 ; COV4: .amdhsa_kernarg_size 168
 ; COV5: .amdhsa_kernarg_size 368
-define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
+define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) nounwind noinline {
   call void @func_implicitarg_ptr()
   ret void
 }
@@ -178,7 +178,7 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
 ; GCN: s_swappc_b64
 
 ; HSA: .amdhsa_kernarg_size 160
-define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
+define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) nounwind noinline "amdgpu-implicitarg-num-bytes"="48" {
   call void @func_implicitarg_ptr()
   ret void
 }
@@ -189,7 +189,7 @@ define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #
 ; GCN-NOT: s[8:9]
 ; GCN: s_swappc_b64
 ; GCN: s_setpc_b64 s[30:31]
-define void @func_call_implicitarg_ptr_func() #0 {
+define void @func_call_implicitarg_ptr_func() nounwind noinline {
   call void @func_implicitarg_ptr()
   ret void
 }
@@ -200,7 +200,7 @@ define void @func_call_implicitarg_ptr_func() #0 {
 ; GCN-NOT: s[8:9]
 ; GCN: s_swappc_b64
 ; GCN: s_setpc_b64 s[30:31]
-define void @opencl_func_call_implicitarg_ptr_func() #0 {
+define void @opencl_func_call_implicitarg_ptr_func() nounwind noinline {
   call void @func_implicitarg_ptr()
   ret void
 }
@@ -211,7 +211,7 @@ define void @opencl_func_call_implicitarg_ptr_func() #0 {
 ; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
 ; GCN: s_waitcnt lgkmcnt(0)
-define void @func_kernarg_implicitarg_ptr() #0 {
+define void @func_kernarg_implicitarg_ptr() nounwind noinline {
   %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr
@@ -225,7 +225,7 @@ define void @func_kernarg_implicitarg_ptr() #0 {
 ; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0
 ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0
 ; GCN: s_waitcnt lgkmcnt(0)
-define void @opencl_func_kernarg_implicitarg_ptr() #0 {
+define void @opencl_func_kernarg_implicitarg_ptr() nounwind noinline {
   %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr
@@ -237,7 +237,7 @@ define void @opencl_func_kernarg_implicitarg_ptr() #0 {
 ; GCN: s_add_u32 s8, s4, 0x70
 ; GCN: s_addc_u32 s9, s5, 0
 ; GCN: s_swappc_b64
-define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 {
+define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) nounwind noinline {
   call void @func_kernarg_implicitarg_ptr()
   ret void
 }
@@ -247,7 +247,7 @@ define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8])
 ; MESA: kernarg_segment_alignment = 6
 
 ; HSA: .amdhsa_kernarg_size 120
-define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 {
+define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) nounwind noinline "amdgpu-implicitarg-num-bytes"="48" {
   %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   ret void
@@ -307,13 +307,8 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>
 ; HSA-NEXT:    .kernarg_segment_size: 120
 ; HSA-LABEL:   .name:           kernel_implicitarg_no_struct_align_padding
 
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
-
-attributes #0 = { nounwind noinline }
-attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
-attributes #2 = { nounwind readnone speculatable }
-attributes #3 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" }
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable
+declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone speculatable
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
index 3a540bdf5b53dd..ef9486426374ce 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
@@ -199,7 +199,5 @@ endif:
   ret float %v
 }
 
-declare void @llvm.amdgcn.init.exec(i64) #1
-declare void @llvm.amdgcn.init.exec.from.input(i32, i32) #1
-
-attributes #1 = { convergent }
+declare void @llvm.amdgcn.init.exec(i64) convergent
+declare void @llvm.amdgcn.init.exec.from.input(i32, i32) convergent
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
index 5d2e10756c372a..a22a0fe114bcf4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8-32BANK %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8-16BANK %s
 
-define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
+define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) nounwind readnone {
 ; GFX9-32BANK-LABEL: interp_f16:
 ; GFX9-32BANK:       ; %bb.0: ; %main_body
 ; GFX9-32BANK-NEXT:    s_mov_b32 m0, s2
@@ -56,7 +56,7 @@ main_body:
 }
 
 ; check that m0 is setup correctly before the interp p1 instruction
-define amdgpu_ps half @interp_p1_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
+define amdgpu_ps half @interp_p1_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) nounwind readnone {
 ; GFX9-32BANK-LABEL: interp_p1_m0_setup:
 ; GFX9-32BANK:       ; %bb.0: ; %main_body
 ; GFX9-32BANK-NEXT:    ;;#ASMSTART
@@ -106,7 +106,7 @@ define amdgpu_ps half @interp_p1_m0_setup(float inreg %i, float inreg %j, i32 in
 ; GFX8-16BANK-NEXT:    v_add_f16_e32 v0, s3, v0
 ; GFX8-16BANK-NEXT:    ; return to shader part epilog
 main_body:
-  %mx = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
+  %mx = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() nounwind readnone
   %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 0, i32 %m0)
   %p2_0 = call half @llvm.amdgcn.interp.p2.f16(float %p1_0, float %j, i32 1, i32 2, i1 0, i32 %m0)
   %my = trunc i32 %mx to i16
@@ -116,7 +116,7 @@ main_body:
 }
 
 ; check that m0 is setup correctly before the interp p2 instruction
-define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
+define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) nounwind readnone {
 ; GFX9-32BANK-LABEL: interp_p2_m0_setup:
 ; GFX9-32BANK:       ; %bb.0: ; %main_body
 ; GFX9-32BANK-NEXT:    s_mov_b32 m0, s2
@@ -170,7 +170,7 @@ define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 in
 ; GFX8-16BANK-NEXT:    ; return to shader part epilog
 main_body:
   %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %i, i32 1, i32 2, i1 0, i32 %m0)
-  %mx = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
+  %mx = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() nounwind readnone
   %p2_0 = call half @llvm.amdgcn.interp.p2.f16(float %p1_0, float %j, i32 1, i32 2, i1 0, i32 %m0)
   %my = trunc i32 %mx to i16
   %mh = bitcast i16 %my to half
@@ -179,9 +179,7 @@ main_body:
 }
 
 ; float @llvm.amdgcn.interp.p1.f16(i, attrchan, attr, high, m0)
-declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
+declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) nounwind readnone
 ; half @llvm.amdgcn.interp.p1.f16(p1, j, attrchan, attr, high, m0)
-declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll
index 429528e9091d13..61f89b505fe546 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
+define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) nounwind {
 ; GCN-LABEL: v_interp_f32:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    s_mov_b32 s3, exec_lo
@@ -27,11 +27,11 @@ main_body:
   %p1_0 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
   %p0_1 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
   %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_1, float %p1_0, float %p1_1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_1, float %p1_0, float %p1_1, i1 true, i1 true) nounwind
   ret void
 }
 
-define amdgpu_ps void @v_interp_f32_many(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
+define amdgpu_ps void @v_interp_f32_many(float inreg %i, float inreg %j, i32 inreg %m0) nounwind {
 ; GCN-LABEL: v_interp_f32_many:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    s_mov_b32 s3, exec_lo
@@ -69,11 +69,11 @@ main_body:
   %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
   %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
   %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) nounwind
   ret void
 }
 
-define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 {
+define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) nounwind {
 ; GCN-LABEL: v_interp_f32_many_vm:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    global_load_b64 v[0:1], v[0:1], off offset:4
@@ -115,11 +115,11 @@ main_body:
   %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
   %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
   %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) nounwind
   ret void
 }
 
-define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
+define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) nounwind {
 ; GCN-LABEL: v_interp_f16:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    s_mov_b32 s3, exec_lo
@@ -147,7 +147,7 @@ main_body:
   ret half %res
 }
 
-define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 {
+define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) nounwind {
 ; GCN-LABEL: v_interp_f16_imm_params:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
@@ -167,13 +167,10 @@ main_body:
   ret half %res
 }
 
-declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0
-declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0
-declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) #0
-declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) nounwind
+declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) nounwind
+declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) nounwind
+declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) nounwind
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
index ce5698c4a1386c..25bfdaaf48900e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
@@ -10,7 +10,7 @@
 ; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
 ; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
 ; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p0, attr0.x{{$}}
-define amdgpu_ps void @v_interp(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 {
+define amdgpu_ps void @v_interp(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) nounwind {
 main_body:
   %i = extractelement <2 x float> %arg4, i32 0
   %j = extractelement <2 x float> %arg4, i32 1
@@ -20,7 +20,7 @@ main_body:
   %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %arg3)
   %const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %arg3)
   %w = fadd float %p1_1, %const
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_0, float %p1_1, float %w, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_0, float %p1_1, float %w, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -39,7 +39,7 @@ main_body:
 ; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr63.w{{$}}
 ; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr64.w{{$}}
 ; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
-define amdgpu_ps void @v_interp_p1(float %i) #0 {
+define amdgpu_ps void @v_interp_p1(float %i) nounwind {
 bb:
   %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256)
   %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256)
@@ -80,7 +80,7 @@ bb:
 ; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr63.x{{$}}
 ; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
 ; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}}
-define amdgpu_ps void @v_interp_p2(float %x, float %j) #0 {
+define amdgpu_ps void @v_interp_p2(float %x, float %j) nounwind {
 bb:
   %p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256)
   %p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256)
@@ -122,7 +122,7 @@ bb:
 ; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p10, attr64.y{{$}}
 ; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, invalid_param_3, attr64.y{{$}}
 ; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, invalid_param_10, attr64.x{{$}}
-define amdgpu_ps void @v_interp_mov(float %x, float %j) #0 {
+define amdgpu_ps void @v_interp_mov(float %x, float %j) nounwind {
 bb:
   %mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256)
   %mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256)
@@ -170,13 +170,13 @@ bb:
 ; TODO-VI-DAG: v_interp_mov_f32_e32 v{{[0-9]+}}, p0, attr0.x{{$}}
 ; TODO-VI: s_mov_b32 m0, -1{{$}}
 ; TODO-VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
-;define amdgpu_ps void @v_interp_readnone(ptr addrspace(3) %lds) #0 {
+;define amdgpu_ps void @v_interp_readnone(ptr addrspace(3) %lds) nounwind {
 ;bb:
 ;  store float 0.000000e+00, ptr addrspace(3) %lds
 ;  %tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0)
 ;  %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4
 ;  store float 0.000000e+00, ptr addrspace(3) %tmp2
-;  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+;  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) nounwind
 ;  ret void
 ;}
 
@@ -185,42 +185,39 @@ bb:
 
 ; GCN-LABEL: {{^}}v_interp_p1_bank16_bug:
 ; 16BANK-NOT: v_interp_p1_f32{{(_e32)*}} [[DST:v[0-9]+]], [[DST]]
-define amdgpu_ps void @v_interp_p1_bank16_bug(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg13, ptr addrspace(4) inreg %arg14, ptr addrspace(4) inreg %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 {
+define amdgpu_ps void @v_interp_p1_bank16_bug(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg13, ptr addrspace(4) inreg %arg14, ptr addrspace(4) inreg %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) nounwind {
 main_body:
   %i.i = extractelement <2 x i32> %arg19, i32 0
   %j.i = extractelement <2 x i32> %arg19, i32 1
   %i.f.i = bitcast i32 %i.i to float
   %j.f.i = bitcast i32 %j.i to float
-  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) #0
-  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) #0
+  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg17) nounwind
+  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg17) nounwind
   %i.i7 = extractelement <2 x i32> %arg19, i32 0
   %j.i8 = extractelement <2 x i32> %arg19, i32 1
   %i.f.i9 = bitcast i32 %i.i7 to float
   %j.f.i10 = bitcast i32 %j.i8 to float
-  %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) #0
-  %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) #0
+  %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 0, i32 %arg17) nounwind
+  %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 0, i32 %arg17) nounwind
   %i.i1 = extractelement <2 x i32> %arg19, i32 0
   %j.i2 = extractelement <2 x i32> %arg19, i32 1
   %i.f.i3 = bitcast i32 %i.i1 to float
   %j.f.i4 = bitcast i32 %j.i2 to float
-  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) #0
-  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) #0
+  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 0, i32 %arg17) nounwind
+  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 0, i32 %arg17) nounwind
   %tmp = call float @llvm.fabs.f32(float %p2.i)
   %tmp34 = call float @llvm.fabs.f32(float %p2.i12)
   %tmp35 = call float @llvm.fabs.f32(float %p2.i6)
   %tmp36 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp, float %tmp34)
   %tmp38 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp35, float 1.000000e+00)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp36, <2 x half> %tmp38, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp36, <2 x half> %tmp38, i1 true, i1 true) nounwind
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) nounwind readnone
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
index bc10eb68d75cbb..a3bcf440e065d8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
@@ -49,10 +49,8 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i1 @llvm.amdgcn.is.private(ptr nocapture) nounwind readnone speculatable
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
index aad4d924952fff..08974aabc15c54 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
@@ -48,10 +48,8 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare i1 @llvm.amdgcn.is.shared(ptr nocapture) nounwind readnone speculatable
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
index 8dba22312ac88c..dfb00dff7736cc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -10,7 +10,7 @@
 
 ; HSA: .amdhsa_kernarg_size 8
 ; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test(ptr addrspace(1) %out) nounwind "amdgpu-implicitarg-num-bytes"="0" {
   %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
   %value = load i32, ptr addrspace(4) %gep
@@ -25,7 +25,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
 ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
 ; HSA:        .amdhsa_kernarg_size 8
-define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) nounwind "amdgpu-implicitarg-num-bytes"="0" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10
   %value = load i32, ptr addrspace(4) %gep
@@ -45,7 +45,7 @@ define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
 ; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
 
 ; HSA:        .amdhsa_kernarg_size 12
-define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 {
+define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) nounwind "amdgpu-implicitarg-num-bytes"="0" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %val = load i32, ptr addrspace(4) %implicitarg.ptr
   store i32 %val, ptr addrspace(1) %out
@@ -63,7 +63,7 @@ define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i
 ; MESA: buffer_store_dword [[V_VAL]]
 ; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
 ; HSA:        .amdhsa_kernarg_size 64
-define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 {
+define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) nounwind "amdgpu-implicitarg-num-bytes"="48" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %val = load i32, ptr addrspace(4) %implicitarg.ptr
   store i32 %val, ptr addrspace(1) %out
@@ -78,7 +78,7 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out,
 
 ; HSA: .amdhsa_kernarg_size 0
 ; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 0
-define amdgpu_kernel void @test_no_kernargs() #1 {
+define amdgpu_kernel void @test_no_kernargs() nounwind "amdgpu-implicitarg-num-bytes"="0" {
   %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
   %value = load i32, ptr addrspace(4) %gep
@@ -90,7 +90,7 @@ define amdgpu_kernel void @test_no_kernargs() #1 {
 ; OS-MESA3D: kernarg_segment_byte_size = 16
 ; OS-MESA3D: kernarg_segment_alignment = 4
 ; HSA:        .amdhsa_kernarg_size 48
-define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() nounwind "amdgpu-implicitarg-num-bytes"="48" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   store volatile i32 %val, ptr addrspace(1) null
@@ -101,7 +101,7 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs()
 ; OS-MESA3D: kernarg_segment_byte_size = 16
 ; OS-MESA3D: kernarg_segment_alignment = 4
 ; HSA:        .amdhsa_kernarg_size 40
-define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() nounwind "amdgpu-implicitarg-num-bytes"="38" {
   %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
   %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
   store volatile i32 %val, ptr addrspace(1) null
@@ -116,13 +116,8 @@ define ptr addrspace(4) @func_kernarg_segment_ptr() {
   ret ptr addrspace(4) %ptr
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" }
-attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
-attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }
+declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
index 8f2bdff0163452..92693194cf3976 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
@@ -29,7 +29,7 @@ define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
   %c1 = fcmp oge float %arg14, 0.0
   call void @llvm.amdgcn.kill(i1 %c1)
   %tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00
-  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) nounwind
   call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
   ret void
 }
@@ -225,7 +225,7 @@ define amdgpu_gs void @neg_olt(float %a) {
 ; GFX10: v_cmp_lt_f32_e32 vcc, 0x3e800000, v0
 ; GCN: v_cndmask_b32
 ; GCN: v_cmp_nle_f32
-define amdgpu_ps void @fcmp_x2(float %a) #0 {
+define amdgpu_ps void @fcmp_x2(float %a) nounwind {
   %ogt = fcmp nsz ogt float %a, 2.500000e-01
   %k = select i1 %ogt, float -1.000000e+00, float 0.000000e+00
   %c = fcmp nsz oge float %k, 0.000000e+00
@@ -251,7 +251,7 @@ define amdgpu_ps float @wqm(float %a) {
 ; This checks that we use the 64-bit encoding when the operand is a SGPR.
 ; GCN-LABEL: {{^}}test_sgpr:
 ; GCN: v_cmp_nle_f32_e64
-define amdgpu_ps void @test_sgpr(float inreg %a) #0 {
+define amdgpu_ps void @test_sgpr(float inreg %a) nounwind {
   %c = fcmp ole float %a, 1.000000e+00
   call void @llvm.amdgcn.kill(i1 %c) #1
   ret void
@@ -259,7 +259,7 @@ define amdgpu_ps void @test_sgpr(float inreg %a) #0 {
 
 ; GCN-LABEL: {{^}}test_non_inline_imm_sgpr:
 ; GCN-NOT: v_cmp_le_f32_e64
-define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 {
+define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) nounwind {
   %c = fcmp ole float %a, 1.500000e+00
   call void @llvm.amdgcn.kill(i1 %c) #1
   ret void
@@ -270,7 +270,7 @@ define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 {
 ; GCN: s_and_b64 exec
 ; GCN: s_cmp
 ; GCN: s_cbranch_scc
-define amdgpu_ps void @test_scc_liveness() #0 {
+define amdgpu_ps void @test_scc_liveness() nounwind {
 main_body:
   br label %loop3
 
@@ -318,9 +318,7 @@ bb35:                                             ; preds = %bb33, %.entry
   ret void
 }
 
-declare void @llvm.amdgcn.kill(i1) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
+declare void @llvm.amdgcn.kill(i1) nounwind
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare void @llvm.amdgcn.s.sendmsg(i32, i32) nounwind
 declare i1 @llvm.amdgcn.wqm.vote(i1)
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
index 6d1ca3fddce5e2..97d20c3f85e98b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
@@ -27,7 +27,7 @@
 ; GCN: buffer_store_b32
 ; GCN: buffer_store_b32
 define amdgpu_ps void @lds_direct_load(ptr addrspace(8) inreg %buf, i32 inreg %arg0,
-                                       i32 inreg %arg1, i32 inreg %arg2) #0 {
+                                       i32 inreg %arg1, i32 inreg %arg2) nounwind {
 main_body:
   %p0 = call float @llvm.amdgcn.lds.direct.load(i32 %arg0)
   ; Ensure memory clustering is occuring for lds_direct_load
@@ -45,8 +45,5 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.lds.direct.load(i32) #1
+declare float @llvm.amdgcn.lds.direct.load(i32) nounwind readonly
 declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32)
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
index 924d9eb7e6c29d..a66dfb7581dd5d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
@@ -31,7 +31,7 @@
 ; GFX12-DAG: s_wait_expcnt 0x0
 ; GCN: buffer_store_b32
 ; GCN: buffer_store_b32
-define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) #0 {
+define amdgpu_ps void @lds_param_load(ptr addrspace(8) inreg %buf, i32 inreg %arg) nounwind {
 main_body:
   %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg)
   ; Ensure memory clustering is occuring for lds_param_load
@@ -49,8 +49,5 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
+declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) nounwind readnone
 declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32)
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
index 9a2715b2ebc202..b8c9f79a9fab5b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
@@ -1,14 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.lerp(i32, i32, i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_lerp:
 ; GCN: v_lerp_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_lerp(ptr addrspace(1) %out, i32 %src) nounwind {
-  %result= call i32 @llvm.amdgcn.lerp(i32 %src, i32 100, i32 100) #0
+  %result= call i32 @llvm.amdgcn.lerp(i32 %src, i32 100, i32 100) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll
index b0a2d10eebafe4..af76bf7b2398d6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll
@@ -3,15 +3,12 @@
 
 ; ERR: intrinsic not supported on subtarget
 
-declare float @llvm.amdgcn.log.clamp.f32(float) #0
+declare float @llvm.amdgcn.log.clamp.f32(float) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_log_clamp_f32:
 ; GCN: v_log_clamp_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @v_log_clamp_f32(ptr addrspace(1) %out, float %src) #1 {
-  %log.clamp = call float @llvm.amdgcn.log.clamp.f32(float %src) #0
+define amdgpu_kernel void @v_log_clamp_f32(ptr addrspace(1) %out, float %src) nounwind {
+  %log.clamp = call float @llvm.amdgcn.log.clamp.f32(float %src) nounwind readnone
   store float %log.clamp, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll
index ebdda381a0762c..1c0b9e0a7501db 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll
@@ -68,12 +68,10 @@ define half @v_fneg_fabs_log_f16(half %src)  {
   ret half %log
 }
 
-declare half @llvm.amdgcn.log.f16(half) #0
-declare float @llvm.amdgcn.log.f32(float) #0
-declare float @llvm.fabs.f32(float) #0
-declare half @llvm.fabs.f16(half) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare half @llvm.amdgcn.log.f16(half) nounwind readnone speculatable willreturn
+declare float @llvm.amdgcn.log.f32(float) nounwind readnone speculatable willreturn
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable willreturn
+declare half @llvm.fabs.f16(half) nounwind readnone speculatable willreturn
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GISEL: {{.*}}
 ; SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
index 4ac9dc8565ff40..5f797e3c923995 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
@@ -7,17 +7,17 @@
 ; VI: v_mbcnt_hi_u32_b32 {{v[0-9]+}}, -1, [[LO]]
 define amdgpu_ps void @mbcnt_intrinsics(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3) {
 main_body:
-  %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
-  %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #0
+  %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
+  %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) nounwind readnone
   %tmp = bitcast i32 %hi to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp, float %tmp, float %tmp, float %tmp, i1 true, i1 true) #1
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp, float %tmp, float %tmp, float %tmp, i1 true, i1 true) nounwind
   ret void
 }
 
 ; GCN-LABEL: {{^}}mbcnt_lo_known_bits_1:
 ; GCN: v_mbcnt_lo_u32_b32
 ; GCN: v_and_b32_e32
-define i32 @mbcnt_lo_known_bits_1(i32 %x, i32 %y) #0 {
+define i32 @mbcnt_lo_known_bits_1(i32 %x, i32 %y) nounwind readnone {
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 %y)
   %mask = and i32 %lo, 63
   ret i32 %mask
@@ -26,7 +26,7 @@ define i32 @mbcnt_lo_known_bits_1(i32 %x, i32 %y) #0 {
 ; GCN-LABEL: {{^}}mbcnt_lo_known_bits_2:
 ; GCN: v_mbcnt_lo_u32_b32
 ; GCN-NOT: and
-define i32 @mbcnt_lo_known_bits_2(i32 %x) #0 {
+define i32 @mbcnt_lo_known_bits_2(i32 %x) nounwind readnone {
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 0)
   %mask = and i32 %lo, 63
   ret i32 %mask
@@ -35,7 +35,7 @@ define i32 @mbcnt_lo_known_bits_2(i32 %x) #0 {
 ; GCN-LABEL: {{^}}mbcnt_lo_known_bits_3:
 ; GCN: v_mbcnt_lo_u32_b32
 ; GCN-NOT: and
-define i32 @mbcnt_lo_known_bits_3(i32 %x) #0 {
+define i32 @mbcnt_lo_known_bits_3(i32 %x) nounwind readnone {
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15)
   %mask = and i32 %lo, 127
   ret i32 %mask
@@ -44,7 +44,7 @@ define i32 @mbcnt_lo_known_bits_3(i32 %x) #0 {
 ; GCN-LABEL: {{^}}mbcnt_lo_known_bits_4:
 ; GCN: v_mbcnt_lo_u32_b32
 ; GCN: v_and_b32_e32
-define i32 @mbcnt_lo_known_bits_4(i32 %x) #0 {
+define i32 @mbcnt_lo_known_bits_4(i32 %x) nounwind readnone {
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 %x, i32 15)
   %mask = and i32 %lo, 63
   ret i32 %mask
@@ -54,7 +54,7 @@ define i32 @mbcnt_lo_known_bits_4(i32 %x) #0 {
 ; GCN-LABEL: {{^}}mbcnt_hi_known_bits_1:
 ; GCN: v_mbcnt_hi_u32_b32
 ; GCN: v_and_b32_e32
-define i32 @mbcnt_hi_known_bits_1(i32 %x, i32 %y) #0 {
+define i32 @mbcnt_hi_known_bits_1(i32 %x, i32 %y) nounwind readnone {
   %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 %y)
   %mask = and i32 %hi, 63
   ret i32 %mask
@@ -63,7 +63,7 @@ define i32 @mbcnt_hi_known_bits_1(i32 %x, i32 %y) #0 {
 ; GCN-LABEL: {{^}}mbcnt_hi_known_bits_2:
 ; GCN: v_mbcnt_hi_u32_b32
 ; GCN-NOT: and
-define i32 @mbcnt_hi_known_bits_2(i32 %x) #0 {
+define i32 @mbcnt_hi_known_bits_2(i32 %x) nounwind readnone {
   %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 0)
   %mask = and i32 %hi, 63
   ret i32 %mask
@@ -72,7 +72,7 @@ define i32 @mbcnt_hi_known_bits_2(i32 %x) #0 {
 ; GCN-LABEL: {{^}}mbcnt_hi_known_bits_3:
 ; GCN: v_mbcnt_hi_u32_b32
 ; GCN-NOT: and
-define i32 @mbcnt_hi_known_bits_3(i32 %x) #0 {
+define i32 @mbcnt_hi_known_bits_3(i32 %x) nounwind readnone {
   %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15)
   %mask = and i32 %hi, 127
   ret i32 %mask
@@ -81,15 +81,12 @@ define i32 @mbcnt_hi_known_bits_3(i32 %x) #0 {
 ; GCN-LABEL: {{^}}mbcnt_hi_known_bits_4:
 ; GCN: v_mbcnt_hi_u32_b32
 ; GCN: v_and_b32_e32
-define i32 @mbcnt_hi_known_bits_4(i32 %x) #0 {
+define i32 @mbcnt_hi_known_bits_4(i32 %x) nounwind readnone {
   %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 %x, i32 15)
   %mask = and i32 %hi, 63
   ret i32 %mask
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
index 6b6fb30da37869..d2ee4623c72976 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
@@ -52,7 +52,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %a = bitcast i32 1 to <2 x i16>
@@ -73,7 +73,7 @@ bb:
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-4:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_16x16x2bf16(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x2bf16(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %a = bitcast i32 1 to <2 x i16>
@@ -94,7 +94,7 @@ bb:
 ; GFX908:         global_store_dwordx4
 ; GFX90A-NOT:     v_accvgpr_read_b32
 ; GFX90A:         global_store_dwordx4 v{{[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_f32_4x4x2bf16(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x2bf16(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %a = bitcast i32 1 to <2 x i16>
@@ -115,7 +115,7 @@ bb:
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-4:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_32x32x4bf16(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x4bf16(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %a = bitcast i32 1 to <2 x i16>
@@ -136,7 +136,7 @@ bb:
 ; GFX908:         global_store_dwordx4
 ; GFX90A-NOT:     v_accvgpr_read_b32
 ; GFX90A:         global_store_dwordx4 v{{[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_f32_16x16x8bf16(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x8bf16(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %a = bitcast i32 1 to <2 x i16>
@@ -145,5 +145,3 @@ bb:
   store <4 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
index 8a460154e4789e..91d463f4920e8f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
@@ -20,7 +20,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 ; GFX940:      v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
@@ -39,7 +39,7 @@ bb:
 ; GFX940:       v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:      v_accvgpr_read_b32
 ; GCN-COUNT-4:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
@@ -58,7 +58,7 @@ bb:
 ; GFX940:      v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
@@ -77,7 +77,7 @@ bb:
 ; GFX940:       v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:      v_accvgpr_read_b32
 ; GCN-COUNT-4:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
@@ -96,7 +96,7 @@ bb:
 ; GFX940:      v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
@@ -112,7 +112,7 @@ bb:
 ; GFX940: v_mfma_f64_4x4x4_4b_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}}
 ; GFX940: v_mfma_f64_4x4x4_4b_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
 ; GCN:    global_store_dwordx2
-define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double 0.0, i32 0, i32 0, i32 0)
   %mai.2 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double %mai.1, i32 1, i32 2, i32 3)
@@ -126,7 +126,7 @@ bb:
 ; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 neg:[1,1,0]
 ; GCN:    global_store_dwordx4
 ; GCN:    global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x double>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %in.1, i32 1, i32 2, i32 3)
@@ -141,7 +141,7 @@ bb:
 ; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
 ; GCN:    global_store_dwordx4
 ; GCN:    global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, i32 0, i32 0, i32 0)
   %mai.2 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %mai.1, i32 1, i32 2, i32 3)
@@ -154,7 +154,7 @@ bb:
 ; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
 ; GCN:    global_store_dwordx4
 ; GCN:    global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 0.0, double 0.0, double 0.0, double 1.0>, i32 0, i32 0, i32 0)
   store <4 x double> %mai.1, ptr addrspace(1) %arg
@@ -168,11 +168,9 @@ bb:
 ; GFX940:  v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
 ; GCN:     global_store_dwordx4
 ; GCN:     global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 123.0, double 123.0, double 123.0, double 123.0>, i32 0, i32 0, i32 0)
   store <4 x double> %mai.1, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll
index 702e513aff4c7b..db3d35ee5fadba 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll
@@ -40,7 +40,7 @@ declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8(<2 x i32>, <4 x i3
 ; GISEL:       v_mfma_i32_16x16x32_i8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_i32_16x16x32i8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_i32_16x16x32i8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x i32>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64 4294967298, i64 12884901892, <4 x i32> %in.1, i32 1, i32 2, i32 3)
@@ -58,7 +58,7 @@ bb:
 ; GISEL:        v_mfma_i32_32x32x16_i8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:      v_accvgpr_read_b32
 ; GCN-COUNT-4:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_i32_32x32x16i8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_i32_32x32x16i8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x i32>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64 4294967298, i64 12884901892, <16 x i32> %in.1, i32 1, i32 2, i32 3)
@@ -76,7 +76,7 @@ bb:
 ; GISEL:       v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3)
@@ -94,7 +94,7 @@ bb:
 ; GISEL:       v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3)
@@ -112,7 +112,7 @@ bb:
 ; GISEL:       v_mfma_f32_16x16x32_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_16x16x32_bf8_bf8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x32_bf8_bf8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8(i64 4294967298, i64 12884901892, <4 x float> %in.1, i32 1, i32 2, i32 3)
@@ -130,7 +130,7 @@ bb:
 ; GISEL:       v_mfma_f32_16x16x32_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_16x16x32_bf8_fp8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x32_bf8_fp8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8(i64 4294967298, i64 12884901892, <4 x float> %in.1, i32 1, i32 2, i32 3)
@@ -148,7 +148,7 @@ bb:
 ; GISEL:       v_mfma_f32_16x16x32_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_16x16x32_fp8_bf8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x32_fp8_bf8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8(i64 4294967298, i64 12884901892, <4 x float> %in.1, i32 1, i32 2, i32 3)
@@ -166,7 +166,7 @@ bb:
 ; GISEL:       v_mfma_f32_16x16x32_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_16x16x32_fp8_fp8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x32_fp8_fp8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64 4294967298, i64 12884901892, <4 x float> %in.1, i32 1, i32 2, i32 3)
@@ -184,7 +184,7 @@ bb:
 ; GISEL:       v_mfma_f32_32x32x16_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x16_bf8_bf8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x16_bf8_bf8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8(i64 4294967298, i64 12884901892, <16 x float> %in.1, i32 1, i32 2, i32 3)
@@ -202,7 +202,7 @@ bb:
 ; GISEL:       v_mfma_f32_32x32x16_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x16_bf8_fp8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x16_bf8_fp8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8(i64 4294967298, i64 12884901892, <16 x float> %in.1, i32 1, i32 2, i32 3)
@@ -220,7 +220,7 @@ bb:
 ; GISEL:       v_mfma_f32_32x32x16_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x16_fp8_bf8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x16_fp8_bf8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8(i64 4294967298, i64 12884901892, <16 x float> %in.1, i32 1, i32 2, i32 3)
@@ -238,7 +238,7 @@ bb:
 ; GISEL:       v_mfma_f32_32x32x16_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GCN-NOT:     v_accvgpr_read_b32
 ; GCN:         global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x16_fp8_fp8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x16_fp8_fp8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 4294967298, i64 12884901892, <16 x float> %in.1, i32 1, i32 2, i32 3)
@@ -254,7 +254,7 @@ bb:
 ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}}
 ; GCN:        v_smfmac_f32_16x16x32_f16 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2
 ; GCN:        global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:[[RHI]]]
-define amdgpu_kernel void @test_smfmac_f32_16x16x32_f16(ptr addrspace(1) %arg, <4 x half> %a, <8 x half> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_f32_16x16x32_f16(ptr addrspace(1) %arg, <4 x half> %a, <8 x half> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.f16(<4 x half> %a, <8 x half> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -273,7 +273,7 @@ bb:
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48
-define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, <4 x half> %a, <8 x half> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, <4 x half> %a, <8 x half> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.f16(<4 x half> %a, <8 x half> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -289,7 +289,7 @@ bb:
 ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}}
 ; GCN:        v_smfmac_f32_16x16x32_bf16 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2
 ; GCN:        global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:[[RHI]]]
-define amdgpu_kernel void @test_smfmac_f32_16x16x32_bf16(ptr addrspace(1) %arg, <4 x i16> %a, <8 x i16> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_f32_16x16x32_bf16(ptr addrspace(1) %arg, <4 x i16> %a, <8 x i16> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.bf16(<4 x i16> %a, <8 x i16> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -308,7 +308,7 @@ bb:
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48
-define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg, <4 x i16> %a, <8 x i16> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg, <4 x i16> %a, <8 x i16> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.bf16(<4 x i16> %a, <8 x i16> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -324,7 +324,7 @@ bb:
 ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}}
 ; GCN:        v_smfmac_i32_16x16x64_i8 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2
 ; GCN:        global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:[[RHI]]]
-define amdgpu_kernel void @test_smfmac_i32_16x16x64_i8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_16x16x64_i8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x i32>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x64.i8(<2 x i32> %a, <4 x i32> %b, <4 x i32> %in.1, i32 %idx, i32 1, i32 2)
@@ -343,7 +343,7 @@ bb:
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48
-define amdgpu_kernel void @test_smfmac_i32_32x32x32_i8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_32x32x32_i8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x i32>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x i32> @llvm.amdgcn.smfmac.i32.32x32x32.i8(<2 x i32> %a, <4 x i32> %b, <16 x i32> %in.1, i32 %idx, i32 1, i32 2)
@@ -359,7 +359,7 @@ bb:
 ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}}
 ; GCN:        v_smfmac_f32_16x16x64_bf8_bf8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2
 ; GCN:        global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]]
-define amdgpu_kernel void @test_smfmac_i32_16x16x64_bf8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_16x16x64_bf8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.bf8(<2 x i32> %a, <4 x i32> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -375,7 +375,7 @@ bb:
 ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}}
 ; GCN:        v_smfmac_f32_16x16x64_bf8_fp8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2
 ; GCN:        global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]]
-define amdgpu_kernel void @test_smfmac_i32_16x16x64_bf8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_16x16x64_bf8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.fp8(<2 x i32> %a, <4 x i32> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -391,7 +391,7 @@ bb:
 ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}}
 ; GCN:        v_smfmac_f32_16x16x64_fp8_bf8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2
 ; GCN:        global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]]
-define amdgpu_kernel void @test_smfmac_i32_16x16x64_fp8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_16x16x64_fp8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.bf8(<2 x i32> %a, <4 x i32> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -407,7 +407,7 @@ bb:
 ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}}
 ; GCN:        v_smfmac_f32_16x16x64_fp8_fp8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2
 ; GCN:        global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]]
-define amdgpu_kernel void @test_smfmac_i32_16x16x64_fp8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_16x16x64_fp8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.fp8(<2 x i32> %a, <4 x i32> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -426,7 +426,7 @@ bb:
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48
-define amdgpu_kernel void @test_smfmac_i32_32x32x32_bf8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_32x32x32_bf8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.bf8(<2 x i32> %a, <4 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -445,7 +445,7 @@ bb:
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48
-define amdgpu_kernel void @test_smfmac_i32_32x32x32_bf8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_32x32x32_bf8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.fp8(<2 x i32> %a, <4 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -464,7 +464,7 @@ bb:
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48
-define amdgpu_kernel void @test_smfmac_i32_32x32x32_fp8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_32x32x32_fp8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.bf8(<2 x i32> %a, <4 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2)
@@ -483,12 +483,10 @@ bb:
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32
 ; GCN-DAG:    global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48
-define amdgpu_kernel void @test_smfmac_i32_32x32x32_fp8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 {
+define amdgpu_kernel void @test_smfmac_i32_32x32x32_fp8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8(<2 x i32> %a, <4 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
index d5ccc288f6a004..5db610dca86b04 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
@@ -16,7 +16,7 @@ declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32, i32, <4 x i32>, i32, i32
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-4:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_i32_32x32x8i8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_i32_32x32x8i8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x i32>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 1, i32 2, <16 x i32> %in.1, i32 1, i32 2, i32 3)
@@ -35,12 +35,10 @@ bb:
 ; GFX908:         global_store_dwordx4
 ; GFX90A-NOT:     v_accvgpr_read_b32
 ; GFX90A:         global_store_dwordx4 v{{[0-9]+}}, [[RES]]
-define amdgpu_kernel void @test_mfma_i32_16x16x16i8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_i32_16x16x16i8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x i32>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 1, i32 2, <4 x i32> %in.1, i32 1, i32 2, i32 3)
   store <4 x i32> %mai.1, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
index 54023770ed0fff..7080ce741fc442 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
@@ -68,7 +68,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 ; GFX908-COUNT-2: global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-NOT:     v_accvgpr_read_b32
 ; GFX90A-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3)
@@ -88,7 +88,7 @@ bb:
 ; GFX908-COUNT-4:    global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-NOT:        v_accvgpr_read_b32
 ; GFX90A-COUNT-4:    global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> %in.1, i32 1, i32 2, i32 3)
@@ -108,7 +108,7 @@ bb:
 ; GFX908:           global_store_dwordx4
 ; GFX90A-NOT:       v_accvgpr_read_b32
 ; GFX90A:           global_store_dwordx4 {{v[0-9]+}}, [[RES]]
-define amdgpu_kernel void @test_mfma_f32_4x4x1f32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x1f32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> %in.1, i32 1, i32 2, i32 3)
@@ -128,7 +128,7 @@ bb:
 ; GFX908-COUNT-4:    global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-NOT:        v_accvgpr_read_b32
 ; GFX90A-COUNT-4:    global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x2f32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x2f32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float 1.0, float 2.0, <16 x float> %in.1, i32 1, i32 2, i32 3)
@@ -148,7 +148,7 @@ bb:
 ; GFX908:           global_store_dwordx4
 ; GFX90A-NOT:       v_accvgpr_read_b32
 ; GFX90A:           global_store_dwordx4 {{v[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_f32_16x16x4f32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x4f32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float 1.0, float 2.0, <4 x float> %in.1, i32 1, i32 2, i32 3)
@@ -167,7 +167,7 @@ bb:
 ; GFX908:            global_store_dwordx4
 ; GFX90A-NOT:        v_accvgpr_read_b32
 ; GFX90A-COUNT-8:    global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_32x32x4f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x4f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %c.1 = load <4 x half>, ptr addrspace(1) %c
@@ -188,7 +188,7 @@ bb:
 ; GFX908:            global_store_dwordx4
 ; GFX90A-NOT:        v_accvgpr_read_b32
 ; GFX90A-COUNT-4:    global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_16x16x4f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x4f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %c.1 = load <4 x half>, ptr addrspace(1) %c
@@ -210,7 +210,7 @@ bb:
 ; GFX908:           global_store_dwordx4
 ; GFX90A-NOT:       v_accvgpr_read_b32
 ; GFX90A:           global_store_dwordx4 {{v[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_f32_4x4x4f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x4f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %c.1 = load <4 x half>, ptr addrspace(1) %c
@@ -233,7 +233,7 @@ bb:
 ; GFX908:            global_store_dwordx4
 ; GFX90A-NOT:        v_accvgpr_read_b32
 ; GFX90A-COUNT-4:    global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_32x32x8f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x8f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %c.1 = load <4 x half>, ptr addrspace(1) %c
@@ -255,7 +255,7 @@ bb:
 ; GFX908:           global_store_dwordx4
 ; GFX90A-NOT:       v_accvgpr_read_b32
 ; GFX90A:           global_store_dwordx4 {{v[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_f32_16x16x16f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x16f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %c.1 = load <4 x half>, ptr addrspace(1) %c
@@ -310,7 +310,7 @@ bb:
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-8:  global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_i32_32x32x4i8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_i32_32x32x4i8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <32 x i32>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 1, i32 2, <32 x i32> %in.1, i32 1, i32 2, i32 3)
@@ -330,7 +330,7 @@ bb:
 ; GFX908:            global_store_dwordx4
 ; GFX90A-NOT:        v_accvgpr_read_b32
 ; GFX90A-COUNT-4:    global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_i32_16x16x4i8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_i32_16x16x4i8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x i32>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 1, i32 2, <16 x i32> %in.1, i32 1, i32 2, i32 3)
@@ -350,7 +350,7 @@ bb:
 ; GFX908:           global_store_dwordx4
 ; GFX90A-NOT:       v_accvgpr_read_b32
 ; GFX90A:           global_store_dwordx4 {{v[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_i32_4x4x4i8(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_i32_4x4x4i8(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x i32>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %in.1, i32 1, i32 2, i32 3)
@@ -363,7 +363,7 @@ bb:
 ; GFX908_A-NEXT: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]]
 ; GFX940:        v_mfma_f32_32x32x1_2b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}]
 ; GFX940-NEXT:   v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_forward_acc(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_forward_acc(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -377,7 +377,7 @@ bb:
 ; GFX908_A-NEXT: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]]
 ; GFX940:        v_mfma_f32_16x16x1_4b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}]
 ; GFX940-NEXT:   v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]]
-define amdgpu_kernel void @test_mfma_f32_16x16x1f32_forward_acc(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x1f32_forward_acc(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> %in.1, i32 0, i32 0, i32 0)
@@ -392,7 +392,7 @@ bb:
 ; GFX940:        v_mfma_f32_4x4x1_16b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}]
 ; GFX940-NEXT:   s_nop 1
 ; GFX940-NEXT:   v_mfma_f32_4x4x1_16b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]]
-define amdgpu_kernel void @test_mfma_f32_4x4x1f32_forward_acc(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x1f32_forward_acc(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> %in.1, i32 0, i32 0, i32 0)
@@ -416,7 +416,7 @@ bb:
 ; GFX908:         global_store_dwordx4
 ; GFX90A-NOT:     v_accvgpr_read_b32
 ; GFX90A:         global_store_dwordx4 {{v[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm_splat(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm_splat(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, i32 0, i32 0, i32 0)
   store <4 x float> %mai.1, ptr addrspace(1) %arg
@@ -435,7 +435,7 @@ bb:
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-4:  global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm_splat(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm_splat(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, i32 0, i32 0, i32 0)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
@@ -454,7 +454,7 @@ bb:
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-4:  global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_32x32x8f16_imm_splat(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x8f16_imm_splat(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>, <4 x half> <half 2.0, half 2.0, half 2.0, half 2.0>, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, i32 0, i32 0, i32 0)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
@@ -473,7 +473,7 @@ bb:
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-8:  global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
   store <32 x float> %mai.1, ptr addrspace(1) %arg
@@ -493,7 +493,7 @@ bb:
 ; GFX908:         global_store_dwordx4
 ; GFX90A-NOT:     v_accvgpr_read_b32
 ; GFX90A:         global_store_dwordx4 {{v[0-9]+}}, [[RES]],
-define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> <float 1.0, float 2.0, float 1.0, float 1.0>, i32 0, i32 0, i32 0)
   store <4 x float> %mai.1, ptr addrspace(1) %arg
@@ -511,7 +511,7 @@ bb:
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-4:  global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 2.0>, i32 0, i32 0, i32 0)
   store <16 x float> %mai.1, ptr addrspace(1) %arg
@@ -587,7 +587,7 @@ bb:
 ; GFX908:          global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-8:  global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}],
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 1.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
   store <32 x float> %mai.1, ptr addrspace(1) %arg
@@ -609,7 +609,7 @@ bb:
 ; GFX908:         global_store_dwordx4
 ; GFX90A-NOT:     v_accvgpr_read_b32
 ; GFX90A:         global_store_dwordx4 {{v[0-9]+}}, [[RES]]
-define amdgpu_kernel void @test_mfma_f32_4x4x1f32_lit_splat(ptr addrspace(1) %arg, i64 %idx) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x1f32_lit_splat(ptr addrspace(1) %arg, i64 %idx) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -632,7 +632,7 @@ bb:
 ; GFX908-COUNT-4: v_accvgpr_read_b32
 ; GFX908:    global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}]
 ; GFX90A_40: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_4x4x1f32_lit_splat_bad_code(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x1f32_lit_splat_bad_code(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -657,7 +657,7 @@ bb:
 ; GFX908-COUNT-8:    global_store_dwordx4
 ; GFX90A_40-NOT:     v_accvgpr_read_b32
 ; GFX90A_40-COUNT-5: global_store_dwordx4 v{{[0-9:]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vecarg(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vecarg(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid
@@ -666,5 +666,3 @@ bb:
   store <32 x float> %mai.1, ptr addrspace(1) %gep
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
index 96975bd93558e4..a5e1ba0c683866 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
@@ -12,7 +12,7 @@
 ; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
 ; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
 define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in) {
-  %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0
+  %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) nounwind readnone convergent
   store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
@@ -27,8 +27,8 @@ define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in) {
 ; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1
 ; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1
 define amdgpu_kernel void @dpp_wait_states(ptr addrspace(1) %out, i32 %in) {
-  %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0
-  %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0
+  %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) nounwind readnone convergent
+  %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) nounwind readnone convergent
   store i32 %tmp1, ptr addrspace(1) %out
   ret void
 }
@@ -60,9 +60,9 @@ else:
 endif:
   %val = phi float [%if_val, %if], [%else_val, %else]
   %val_i32 = bitcast float %val to i32
-  %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %val_i32, i32 1, i32 1, i32 1, i1 1) #0
-  %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0
-  %tmp2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp1, i32 1, i32 1, i32 1, i1 1) #0
+  %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %val_i32, i32 1, i32 1, i32 1, i1 1) nounwind readnone convergent
+  %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) nounwind readnone convergent
+  %tmp2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp1, i32 1, i32 1, i32 1, i1 1) nounwind readnone convergent
   %tmp_float = bitcast i32 %tmp2 to float
   store float %tmp_float, ptr addrspace(1) %out
   ret void
@@ -72,7 +72,7 @@ endif:
 ; VI: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
 ; VI: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
 define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) {
-  %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 0) #0
+  %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 0) nounwind readnone convergent
   store i64 %tmp0, ptr addrspace(1) %out
   ret void
 }
@@ -86,13 +86,11 @@ define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) {
 ; VI-OPT-DAG: v_mov_b32_dpp v[[OLD_HI]], v[[OLD_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
 ; VI-NOOPT-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
 define amdgpu_kernel void @mov_dpp64_imm_test(ptr addrspace(1) %out) {
-  %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 123451234512345, i32 1, i32 1, i32 1, i1 0) #0
+  %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 123451234512345, i32 1, i32 1, i32 1, i1 0) nounwind readnone convergent
   store i64 %tmp0, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.mov.dpp.i64(i64, i32, i32, i32, i1) #0
-
-attributes #0 = { nounwind readnone convergent }
+declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) nounwind readnone convergent
+declare i64 @llvm.amdgcn.mov.dpp.i64(i64, i32, i32, i32, i1) nounwind readnone convergent
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
index 8bff17b7299270..80f957cb946b01 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
@@ -7,7 +7,7 @@
 ; GFX10PLUS: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
 ; GFX10PLUS: v_mov_b32_dpp [[SRC]], [[SRC]]  dpp8:[1,0,0,0,0,0,0,0]{{$}}
 define amdgpu_kernel void @dpp8_test(ptr addrspace(1) %out, i32 %in) {
-  %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0
+  %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) nounwind readnone convergent
   store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
@@ -18,12 +18,10 @@ define amdgpu_kernel void @dpp8_test(ptr addrspace(1) %out, i32 %in) {
 ; GFX10PLUS: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] dpp8:[1,0,0,0,0,0,0,0]{{$}}
 ; GFX10PLUS: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] dpp8:[5,0,0,0,0,0,0,0]{{$}}
 define amdgpu_kernel void @dpp8_wait_states(ptr addrspace(1) %out, i32 %in) {
-  %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0
-  %tmp1 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %tmp0, i32 5) #0
+  %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) nounwind readnone convergent
+  %tmp1 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %tmp0, i32 5) nounwind readnone convergent
   store i32 %tmp1, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #0
-
-attributes #0 = { nounwind readnone convergent }
+declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) nounwind readnone convergent
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
index 3a5519a90e191b..20664fd57ebbbe 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
@@ -1,16 +1,16 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
+declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
 ; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_mqsad_pk_u16_u8(ptr addrspace(1) %out, i64 %src) {
-  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0
-  %tmp1 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0
-  %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) nounwind readnone
+  %tmp1 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) nounwind readnone
+  %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) nounwind readnone
   store i64 %tmp2, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -20,13 +20,11 @@ define amdgpu_kernel void @v_mqsad_pk_u16_u8(ptr addrspace(1) %out, i64 %src) {
 ; GCN-DAG: v_mov_b32_e32 v3, v1
 ; GCN-DAG: v_mov_b32_e32 v2, v0
 define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(ptr addrspace(1) %out, i64 %src, i32 %a, i64 %b) {
-  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
-  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
-  %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0
-  %tmp3 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0
-  %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) nounwind readnone
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) nounwind readnone
+  %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) nounwind readnone
+  %tmp3 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) nounwind readnone
+  %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) nounwind readnone
   store i64 %tmp4, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
index 9e6a161ab7abab..93128d2f6b65f2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
@@ -1,17 +1,17 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0
+declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate:
 ; GCN-DAG: v_mov_b32_e32 v0, v2
 ; GCN-DAG: v_mov_b32_e32 v1, v3
 ; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(ptr addrspace(1) %out, i64 %src, i32 %a) {
-  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
-  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
-  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0
-  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) nounwind readnone
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) nounwind readnone
+  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) nounwind readnone
+  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) nounwind readnone
   store <4 x i32> %tmp3, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -21,10 +21,10 @@ define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(ptr addrspace
 ; GCN-DAG: v_mov_b32_e32 v1, v3
 ; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(ptr addrspace(1) %out, i64 %src, i32 %a, <4 x i32> %b) {
-  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
-  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
-  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0
-  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) nounwind readnone
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) nounwind readnone
+  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) nounwind readnone
+  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) nounwind readnone
   store <4 x i32> %tmp3, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -34,10 +34,10 @@ define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(ptr addrspace(1) %out, i
 ; GCN-DAG: v_mov_b32_e32 v1, v3
 ; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(ptr addrspace(1) %out, i64 %src, i32 %a) {
-  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
-  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
-  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0
-  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) nounwind readnone
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) nounwind readnone
+  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) nounwind readnone
+  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) nounwind readnone
   store <4 x i32> %tmp3, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -48,12 +48,10 @@ define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(ptr addrspace(1) %
 ; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(ptr addrspace(1) %out, i64 %src, i32 %a, ptr addrspace(1) %input) {
   %in = load <4 x i32>, ptr addrspace(1) %input
-  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
-  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
-  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0
-  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) nounwind readnone
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) nounwind readnone
+  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) nounwind readnone
+  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) nounwind readnone
   store <4 x i32> %tmp3, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll
index 63d71a1e34949e..05f54a5960ca5a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.msad.u8(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.msad.u8(i32, i32, i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_msad_u8:
 ; GCN: v_msad_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_msad_u8(ptr addrspace(1) %out, i32 %src) {
-  %result= call i32 @llvm.amdgcn.msad.u8(i32 %src, i32 100, i32 100) #0
+  %result= call i32 @llvm.amdgcn.msad.u8(i32 %src, i32 100, i32 100) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -14,9 +14,7 @@ define amdgpu_kernel void @v_msad_u8(ptr addrspace(1) %out, i32 %src) {
 ; GCN-LABEL: {{^}}v_msad_u8_non_immediate:
 ; GCN: v_msad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @v_msad_u8_non_immediate(ptr addrspace(1) %out, i32 %src, i32 %a, i32 %b) {
-  %result= call i32 @llvm.amdgcn.msad.u8(i32 %src, i32 %a, i32 %b) #0
+  %result= call i32 @llvm.amdgcn.msad.u8(i32 %src, i32 %a, i32 %b) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll
index 9944352f07dd90..5e80b5fc46432c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll
@@ -2,13 +2,10 @@
 
 ; GCN-LABEL: {{^}}test_mul_i24:
 ; GCN: v_mul_i32_i24
-define amdgpu_kernel void @test_mul_i24(ptr addrspace(1) %out, i32 %src1, i32 %src2) #1 {
-  %val = call i32 @llvm.amdgcn.mul.i24(i32 %src1, i32 %src2) #0
+define amdgpu_kernel void @test_mul_i24(ptr addrspace(1) %out, i32 %src1, i32 %src2) nounwind {
+  %val = call i32 @llvm.amdgcn.mul.i24(i32 %src1, i32 %src2) nounwind readnone speculatable
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.mul.i24(i32, i32) #0
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.mul.i24(i32, i32) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll
index 6768475b1460ba..ad6b613e18512c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll
@@ -2,13 +2,10 @@
 
 ; GCN-LABEL: {{^}}test_mul_u24:
 ; GCN: v_mul_u32_u24
-define amdgpu_kernel void @test_mul_u24(ptr addrspace(1) %out, i32 %src1, i32 %src2) #1 {
-  %val = call i32 @llvm.amdgcn.mul.u24(i32 %src1, i32 %src2) #0
+define amdgpu_kernel void @test_mul_u24(ptr addrspace(1) %out, i32 %src1, i32 %src2) nounwind {
+  %val = call i32 @llvm.amdgcn.mul.u24(i32 %src1, i32 %src2) nounwind readnone speculatable
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.mul.u24(i32, i32) #0
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.mul.u24(i32, i32) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
index f5f51f61eafe12..020e6e3cd106ad 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
@@ -1,47 +1,44 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.perm(i32, i32, i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_perm_b32_v_v_v:
 ; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2
-define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, ptr addrspace(1) %out) #1 {
-  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) #0
+define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, ptr addrspace(1) %out) nounwind {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) nounwind readnone
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_perm_b32_v_v_c:
 ; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, {{[vs][0-9]+}}
-define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, ptr addrspace(1) %out) #1 {
-  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, ptr addrspace(1) %out) nounwind {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) nounwind readnone
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_perm_b32_s_v_c:
 ; GCN: v_perm_b32 v{{[0-9]+}}, s0, v0, v{{[0-9]+}}
-define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, ptr addrspace(1) %out) #1 {
-  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, ptr addrspace(1) %out) nounwind {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) nounwind readnone
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_perm_b32_s_s_c:
 ; GCN: v_perm_b32 v{{[0-9]+}}, s0, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, ptr addrspace(1) %out) #1 {
-  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, ptr addrspace(1) %out) nounwind {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) nounwind readnone
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_perm_b32_v_s_i:
 ; GCN: v_perm_b32 v{{[0-9]+}}, v0, s0, 1
-define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, ptr addrspace(1) %out) #1 {
-  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) #0
+define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, ptr addrspace(1) %out) nounwind {
+  %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) nounwind readnone
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
index 955d8ae5cc054c..cd9ebb595f64df 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
@@ -10,7 +10,7 @@
 ; there is no WQM use and therefore llvm.amdgcn.ps.live is constant. However,
 ; the expectation is that the intrinsic will be used in non-trivial shaders,
 ; so such an optimization doesn't seem worth the effort.
-define amdgpu_ps float @test1() #0 {
+define amdgpu_ps float @test1() nounwind {
   %live = call i1 @llvm.amdgcn.ps.live()
   %live.32 = zext i1 %live to i32
   %r = bitcast i32 %live.32 to float
@@ -24,7 +24,7 @@ define amdgpu_ps float @test1() #0 {
 ; CHECK-DAG: s_wqm_b64 exec, exec
 ; CHECK-DAG: v_cndmask_b32_e64 [[VAR:v[0-9]+]], 0, 1, [[COPY]]
 ; CHECK: image_sample v0, [[VAR]],
-define amdgpu_ps float @test2() #0 {
+define amdgpu_ps float @test2() nounwind {
   %live = call i1 @llvm.amdgcn.ps.live()
   %live.32 = zext i1 %live to i32
   %live.32.bc = bitcast i32 %live.32 to float
@@ -39,7 +39,7 @@ define amdgpu_ps float @test2() #0 {
 ; CHECK-DAG: s_xor_b64 [[HELPER:s\[[0-9]+:[0-9]+\]]], [[LIVE]], -1
 ; CHECK-DAG: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[HELPER]]
 ; CHECK: ; %dead
-define amdgpu_ps float @test3(i32 %in) #0 {
+define amdgpu_ps float @test3(i32 %in) nounwind {
 entry:
   %live = call i1 @llvm.amdgcn.ps.live()
   br i1 %live, label %end, label %dead
@@ -51,14 +51,10 @@ dead:
 end:
   %tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ]
   %tc.bc = bitcast i32 %tc to float
-  %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
+  %t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) nounwind
   %r = extractelement <4 x float> %t, i32 0
   ret float %r
 }
 
-declare i1 @llvm.amdgcn.ps.live() #1
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
+declare i1 @llvm.amdgcn.ps.live() nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
index f81be1a0308d08..4ac08f420cbd5d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
@@ -1,16 +1,16 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
+declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
 ; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_qsad_pk_u16_u8(ptr addrspace(1) %out, i64 %src) {
-  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0
-  %tmp1 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0
-  %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) nounwind readnone
+  %tmp1 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) nounwind readnone
+  %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) nounwind readnone
   store i64 %tmp2, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -20,13 +20,11 @@ define amdgpu_kernel void @v_qsad_pk_u16_u8(ptr addrspace(1) %out, i64 %src) {
 ; GCN-DAG: v_mov_b32_e32 v3, v1
 ; GCN-DAG: v_mov_b32_e32 v2, v0
 define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(ptr addrspace(1) %out, i64 %src, i32 %a, i64 %b) {
-  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
-  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
-  %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0
-  %tmp3 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0
-  %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) nounwind readnone
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) nounwind readnone
+  %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) nounwind readnone
+  %tmp3 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) nounwind readnone
+  %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) nounwind readnone
   store i64 %tmp4, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll
index 36d23197887136..8702a33fd43010 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll
@@ -7,15 +7,13 @@
 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
 ; GCN: .amdhsa_user_sgpr_queue_ptr 1
 define amdgpu_kernel void @test(ptr addrspace(1) %out) {
-  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
+  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone
   %value = load i32, ptr addrspace(4) %queue_ptr
   store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-
-attributes #0 = { nounwind readnone }
+declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() nounwind readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll
index 5c917c97e261f8..b852fa25ecbd7a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll
@@ -66,7 +66,5 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
   ret void
 }
 
-declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) nounwind
+declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
index 4d80e4ce5af14d..e89e0857cafc9b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
@@ -129,20 +129,18 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare float @llvm.amdgcn.raw.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #0
-declare i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare float @llvm.amdgcn.raw.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) nounwind
+declare i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.ll
index 60c6268e448cb2..400260e232534e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.ll
@@ -80,8 +80,6 @@ main_body:
   ret <2 x float> %data
 }
 
-declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
index 0c6bba2426947e..4ca0c3fefc4a93 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
@@ -1379,18 +1379,17 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #0
-declare <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32>, i32, i32, i32) #0
-declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32) #0
-declare i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32>, i32, i32, i32) #0
-declare <2 x i16> @llvm.amdgcn.raw.buffer.load.v2i16(<4 x i32>, i32, i32, i32) #0
-declare <4 x i16> @llvm.amdgcn.raw.buffer.load.v4i16(<4 x i32>, i32, i32, i32) #0
-declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32) #0
-declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32) #0
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind readonly
+declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32) nounwind readonly
+declare i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <2 x i16> @llvm.amdgcn.raw.buffer.load.v2i16(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <4 x i16> @llvm.amdgcn.raw.buffer.load.v4i16(<4 x i32>, i32, i32, i32) nounwind readonly
+declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.ll
index 2fe162cb56e6fc..d3e1d7d0d580c0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.ll
@@ -67,10 +67,7 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
index 75da1adc3123cc..6dbb18b140c0dd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
@@ -113,7 +113,7 @@ main_body:
   ret void
 }
 
-define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %offset) #0 {
+define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %offset) nounwind {
 ; GFX68-LABEL: buffer_store_x2:
 ; GFX68:       ; %bb.0: ; %main_body
 ; GFX68-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
@@ -418,7 +418,7 @@ main_body:
   ret void
 }
 
-define amdgpu_ps void @buffer_store_v4f16(<4 x i32> inreg %rsrc, <4 x half> %data, i32 %offset) #0 {
+define amdgpu_ps void @buffer_store_v4f16(<4 x i32> inreg %rsrc, <4 x half> %data, i32 %offset) nounwind {
 ; VERDE-LABEL: buffer_store_v4f16:
 ; VERDE:       ; %bb.0: ; %main_body
 ; VERDE-NEXT:    v_cvt_f16_f32_e32 v3, v3
@@ -491,7 +491,7 @@ main_body:
   ret void
 }
 
-define amdgpu_ps void @buffer_store_v4i16(<4 x i32> inreg %rsrc, <4 x i16> %data, i32 %offset) #0 {
+define amdgpu_ps void @buffer_store_v4i16(<4 x i32> inreg %rsrc, <4 x i16> %data, i32 %offset) nounwind {
 ; VERDE-LABEL: buffer_store_v4i16:
 ; VERDE:       ; %bb.0: ; %main_body
 ; VERDE-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -575,20 +575,17 @@ define amdgpu_ps void @raw_buffer_store_x1_offset_swizzled_not_merged(<4 x i32>
   ret void
 }
 
-declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
-declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
index ccb3c4fa19e82e..8870841a2ea6f3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
@@ -66,7 +66,5 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_
   ret void
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) nounwind
+declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
index 9c9cf36baecf10..4abf6ff12d4d85 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
@@ -149,20 +149,18 @@ main_body:
   ret void
 }
 
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) #0
-declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare float @llvm.amdgcn.raw.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.ll
index 81aee1e9c44884..2a0fd9d05ea1db 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.ll
@@ -80,8 +80,6 @@ main_body:
   ret <2 x float> %data
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #0
-
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll
index 04d221325a5912..dac5b60b3dbd90 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll
@@ -1126,18 +1126,17 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #0
-declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32) #0
-declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #0
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind readonly
+declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.ll
index d2c9b4b73f72e1..30ad20b35b4a70 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.ll
@@ -67,10 +67,7 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.f32(float, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.raw.ptr.buffer.store.format.f32(float, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll
index 1251d2e3bba551..9823c8dbc5914a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll
@@ -61,7 +61,7 @@ main_body:
 ;CHECK-LABEL: {{^}}buffer_store_x2:
 ;CHECK-NOT: s_waitcnt
 ;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
-define amdgpu_ps void @buffer_store_x2(ptr addrspace(8) inreg %rsrc, <2 x float> %data, i32 %offset) #0 {
+define amdgpu_ps void @buffer_store_x2(ptr addrspace(8) inreg %rsrc, <2 x float> %data, i32 %offset) nounwind {
 main_body:
   call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
   ret void
@@ -240,7 +240,7 @@ main_body:
 ;CHECK-LABEL: {{^}}buffer_store_v4f16:
 ;CHECK-NOT: s_waitcnt
 ;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
-define amdgpu_ps void @buffer_store_v4f16(ptr addrspace(8) inreg %rsrc, <4 x half> %data, i32 %offset) #0 {
+define amdgpu_ps void @buffer_store_v4f16(ptr addrspace(8) inreg %rsrc, <4 x half> %data, i32 %offset) nounwind {
 main_body:
   call void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
   ret void
@@ -270,7 +270,7 @@ main_body:
 ;CHECK-LABEL: {{^}}buffer_store_v4i16:
 ;CHECK-NOT: s_waitcnt
 ;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
-define amdgpu_ps void @buffer_store_v4i16(ptr addrspace(8) inreg %rsrc, <4 x i16> %data, i32 %offset) #0 {
+define amdgpu_ps void @buffer_store_v4i16(ptr addrspace(8) inreg %rsrc, <4 x i16> %data, i32 %offset) nounwind {
 main_body:
   call void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
   ret void
@@ -307,20 +307,17 @@ define amdgpu_ps void @raw_ptr_buffer_store_x1_offset_swizzled_not_merged(ptr ad
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #1
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16>, ptr addrspace(8), i32, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16>, ptr addrspace(8), i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16>, ptr addrspace(8), i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.ll
index 22396631b38df3..0727492e462b21 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.ll
@@ -279,9 +279,7 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
index 8641bf1b03f36a..9f921ee9350068 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
@@ -363,9 +363,7 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll
index 4f4e9482e6ced9..bfd3e8164c51d5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll
@@ -3,12 +3,12 @@
 
 ; ERROR: error: <unknown>:0:0: in function rcp_legacy_f32 void (ptr addrspace(1), float): intrinsic not supported on subtarget
 
-declare float @llvm.amdgcn.rcp.legacy(float) #0
+declare float @llvm.amdgcn.rcp.legacy(float) nounwind readnone
 
 ; GCN-LABEL: {{^}}rcp_legacy_f32:
 ; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @rcp_legacy_f32(ptr addrspace(1) %out, float %src) #1 {
-  %rcp = call float @llvm.amdgcn.rcp.legacy(float %src) #0
+define amdgpu_kernel void @rcp_legacy_f32(ptr addrspace(1) %out, float %src) nounwind {
+  %rcp = call float @llvm.amdgcn.rcp.legacy(float %src) nounwind readnone
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -16,27 +16,24 @@ define amdgpu_kernel void @rcp_legacy_f32(ptr addrspace(1) %out, float %src) #1
 ; TODO: Really these should be constant folded
 ; GCN-LABEL: {{^}}rcp_legacy_f32_constant_4.0
 ; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, 4.0
-define amdgpu_kernel void @rcp_legacy_f32_constant_4.0(ptr addrspace(1) %out) #1 {
-  %rcp = call float @llvm.amdgcn.rcp.legacy(float 4.0) #0
+define amdgpu_kernel void @rcp_legacy_f32_constant_4.0(ptr addrspace(1) %out) nounwind {
+  %rcp = call float @llvm.amdgcn.rcp.legacy(float 4.0) nounwind readnone
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}rcp_legacy_f32_constant_100.0
 ; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, 0x42c80000
-define amdgpu_kernel void @rcp_legacy_f32_constant_100.0(ptr addrspace(1) %out) #1 {
-  %rcp = call float @llvm.amdgcn.rcp.legacy(float 100.0) #0
+define amdgpu_kernel void @rcp_legacy_f32_constant_100.0(ptr addrspace(1) %out) nounwind {
+  %rcp = call float @llvm.amdgcn.rcp.legacy(float 100.0) nounwind readnone
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}rcp_legacy_undef_f32:
 ; GCN-NOT: v_rcp_legacy_f32
-define amdgpu_kernel void @rcp_legacy_undef_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @rcp_legacy_undef_f32(ptr addrspace(1) %out) nounwind {
   %rcp = call float @llvm.amdgcn.rcp.legacy(float undef)
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
index f4663e9daccc8c..e34c3fc5ebd43d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -1,18 +1,18 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.amdgcn.rcp.f32(float) #0
-declare double @llvm.amdgcn.rcp.f64(double) #0
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
+declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
 
-declare double @llvm.amdgcn.sqrt.f64(double) #0
-declare float @llvm.amdgcn.sqrt.f32(float) #0
-declare double @llvm.sqrt.f64(double) #0
-declare float @llvm.sqrt.f32(float) #0
+declare double @llvm.amdgcn.sqrt.f64(double) nounwind readnone
+declare float @llvm.amdgcn.sqrt.f32(float) nounwind readnone
+declare double @llvm.sqrt.f64(double) nounwind readnone
+declare float @llvm.sqrt.f32(float) nounwind readnone
 
 ; FUNC-LABEL: {{^}}rcp_undef_f32:
 ; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000
 ; SI-NOT: [[NAN]]
 ; SI: buffer_store_dword [[NAN]]
-define amdgpu_kernel void @rcp_undef_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @rcp_undef_f32(ptr addrspace(1) %out) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
@@ -21,7 +21,7 @@ define amdgpu_kernel void @rcp_undef_f32(ptr addrspace(1) %out) #1 {
 ; FUNC-LABEL: {{^}}rcp_2_f32:
 ; SI-NOT: v_rcp_f32
 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5
-define amdgpu_kernel void @rcp_2_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @rcp_2_f32(ptr addrspace(1) %out) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0)
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
@@ -30,7 +30,7 @@ define amdgpu_kernel void @rcp_2_f32(ptr addrspace(1) %out) #1 {
 ; FUNC-LABEL: {{^}}rcp_10_f32:
 ; SI-NOT: v_rcp_f32
 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd
-define amdgpu_kernel void @rcp_10_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @rcp_10_f32(ptr addrspace(1) %out) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0)
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
@@ -40,7 +40,7 @@ define amdgpu_kernel void @rcp_10_f32(ptr addrspace(1) %out) #1 {
 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
 ; SI-NOT: [[RESULT]]
 ; SI: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(ptr addrspace(1) %out, float %src) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %rcp = fdiv float 1.0, %src, !fpmath !0
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
@@ -50,7 +50,7 @@ define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(ptr addrspace(1) %out,
 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
 ; SI-NOT: [[RESULT]]
 ; SI: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #4 {
+define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" {
   %rcp = fdiv float 1.0, %src, !fpmath !0
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
@@ -58,7 +58,7 @@ define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out,
 
 ; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:
 ; SI: v_div_scale_f32
-define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #3 {
+define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" {
   %rcp = fdiv float 1.0, %src
   store float %rcp, ptr addrspace(1) %out, align 4
   ret void
@@ -74,7 +74,7 @@ define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %ou
 ; SI: v_fma_f32
 ; SI: v_fma_f32
 ; SI: v_rcp_f32
-define amdgpu_kernel void @safe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @safe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %sqrt = call contract float @llvm.sqrt.f32(float %src)
   %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt)
   store float %rcp, ptr addrspace(1) %out, align 4
@@ -84,7 +84,7 @@ define amdgpu_kernel void @safe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %sr
 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_amdgcn_sqrt_f32:
 ; SI: v_sqrt_f32_e32
 ; SI: v_rcp_f32_e32
-define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32(ptr addrspace(1) %out, float %src) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %sqrt = call contract float @llvm.amdgcn.sqrt.f32(float %src)
   %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt)
   store float %rcp, ptr addrspace(1) %out, align 4
@@ -94,7 +94,7 @@ define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32(ptr addrspace(1) %ou
 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract:
 ; SI: v_sqrt_f32_e32
 ; SI: v_rcp_f32_e32
-define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract(ptr addrspace(1) %out, float %src) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %sqrt = call float @llvm.amdgcn.sqrt.f32(float %src)
   %rcp = call contract float @llvm.amdgcn.rcp.f32(float %sqrt)
   store float %rcp, ptr addrspace(1) %out, align 4
@@ -104,7 +104,7 @@ define amdgpu_kernel void @safe_rsq_rcp_pat_amdgcn_sqrt_f32_nocontract(ptr addrs
 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:
 ; SI: v_sqrt_f32_e32
 ; SI: v_rcp_f32_e32
-define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #2 {
+define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %sqrt = call float @llvm.sqrt.f32(float %src)
   %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
   store float %rcp, ptr addrspace(1) %out, align 4
@@ -115,7 +115,7 @@ define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %
 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
 ; SI-NOT: [[RESULT]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @rcp_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @rcp_f64(ptr addrspace(1) %out, double %src) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
   store double %rcp, ptr addrspace(1) %out, align 8
   ret void
@@ -125,7 +125,7 @@ define amdgpu_kernel void @rcp_f64(ptr addrspace(1) %out, double %src) #1 {
 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
 ; SI-NOT: [[RESULT]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @unsafe_rcp_f64(ptr addrspace(1) %out, double %src) #2 {
+define amdgpu_kernel void @unsafe_rcp_f64(ptr addrspace(1) %out, double %src) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
   store double %rcp, ptr addrspace(1) %out, align 8
   ret void
@@ -133,7 +133,7 @@ define amdgpu_kernel void @unsafe_rcp_f64(ptr addrspace(1) %out, double %src) #2
 
 ; FUNC-LABEL: {{^}}rcp_pat_f64:
 ; SI: v_div_scale_f64
-define amdgpu_kernel void @rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @rcp_pat_f64(ptr addrspace(1) %out, double %src) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %rcp = fdiv double 1.0, %src
   store double %rcp, ptr addrspace(1) %out, align 8
   ret void
@@ -147,7 +147,7 @@ define amdgpu_kernel void @rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
 ; SI: v_fma_f64
 ; SI: v_fma_f64
 ; SI: v_fma_f64
-define amdgpu_kernel void @unsafe_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
+define amdgpu_kernel void @unsafe_rcp_pat_f64(ptr addrspace(1) %out, double %src) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %rcp = fdiv double 1.0, %src
   store double %rcp, ptr addrspace(1) %out, align 8
   ret void
@@ -165,7 +165,7 @@ define amdgpu_kernel void @unsafe_rcp_pat_f64(ptr addrspace(1) %out, double %src
 ; SI: v_fma_f64
 ; SI: v_fma_f64
 ; SI: v_rcp_f64
-define amdgpu_kernel void @safe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @safe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %sqrt = call double @llvm.sqrt.f64(double %src)
   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
   store double %rcp, ptr addrspace(1) %out, align 8
@@ -176,7 +176,7 @@ define amdgpu_kernel void @safe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %s
 ; SI-NOT: v_rsq_f64_e32
 ; SI: v_sqrt_f64
 ; SI: v_rcp_f64
-define amdgpu_kernel void @safe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 {
+define amdgpu_kernel void @safe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %sqrt = call double @llvm.amdgcn.sqrt.f64(double %src)
   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
   store double %rcp, ptr addrspace(1) %out, align 8
@@ -195,7 +195,7 @@ define amdgpu_kernel void @safe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %ou
 ; SI: v_fma_f64
 ; SI: v_rcp_f64
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
+define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %sqrt = call double @llvm.sqrt.f64(double %src)
   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
   store double %rcp, ptr addrspace(1) %out, align 8
@@ -206,17 +206,11 @@ define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double
 ; SI: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SQRT]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @unsafe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 {
+define amdgpu_kernel void @unsafe_amdgcn_sqrt_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %sqrt = call double @llvm.amdgcn.sqrt.f64(double %src)
   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
   store double %rcp, ptr addrspace(1) %out, align 8
   ret void
 }
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #3 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="ieee,ieee" }
-attributes #4 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="ieee,ieee" }
-
 !0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
index 0284f44f5f14d4..204beb8a3e7ff1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
@@ -1,10 +1,10 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
 
-declare i32 @llvm.amdgcn.readfirstlane(i32) #0
+declare i32 @llvm.amdgcn.readfirstlane(i32) nounwind readnone convergent
 
 ; CHECK-LABEL: {{^}}test_readfirstlane:
 ; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, v2
-define void @test_readfirstlane(ptr addrspace(1) %out, i32 %src) #1 {
+define void @test_readfirstlane(ptr addrspace(1) %out, i32 %src) nounwind {
   %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %src)
   store i32 %readfirstlane, ptr addrspace(1) %out, align 4
   ret void
@@ -14,7 +14,7 @@ define void @test_readfirstlane(ptr addrspace(1) %out, i32 %src) #1 {
 ; CHECK: s_mov_b32 [[SGPR_VAL:s[0-9]]], 32
 ; CHECK-NOT: [[SGPR_VAL]]
 ; CHECK: ; use [[SGPR_VAL]]
-define amdgpu_kernel void @test_readfirstlane_imm(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm(ptr addrspace(1) %out) nounwind {
   %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 32)
   call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
   ret void
@@ -24,7 +24,7 @@ define amdgpu_kernel void @test_readfirstlane_imm(ptr addrspace(1) %out) #1 {
 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], 32
 ; CHECK-NOT: [[VVAL]]
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]]
-define amdgpu_kernel void @test_readfirstlane_imm_fold(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_fold(ptr addrspace(1) %out) nounwind {
   %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 32)
   store i32 %readfirstlane, ptr addrspace(1) %out, align 4
   ret void
@@ -34,7 +34,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold(ptr addrspace(1) %out) #1
 ; CHECK: s_mov_b32 m0, -1
 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]]
-define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) nounwind {
   %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
   %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0)
   store i32 %readfirstlane, ptr addrspace(1) %out, align 4
@@ -49,7 +49,7 @@ define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 {
 ; CHECK-NOT: readfirstlane
 ; CHECK: v_mov_b32_e32 [[VCOPY:v[0-9]+]], [[SGPR]]
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VCOPY]]
-define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr(ptr addrspace(1) %out) nounwind {
   %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
   %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %sgpr)
   store i32 %readfirstlane, ptr addrspace(1) %out, align 4
@@ -59,13 +59,10 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr(ptr addrspace(1) %o
 ; Make sure this doesn't crash.
 ; CHECK-LABEL: {{^}}test_readfirstlane_fi:
 ; CHECK: s_mov_b32 [[FIVAL:s[0-9]]], 0
-define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) nounwind {
   %alloca = alloca i32, addrspace(5)
   %int = ptrtoint ptr addrspace(5) %alloca to i32
   %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int)
   call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
   ret void
 }
-
-attributes #0 = { nounwind readnone convergent }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
index 51465f6bd10ce5..bb38fd0adbc687 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
@@ -1,10 +1,10 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
 
-declare i32 @llvm.amdgcn.readlane(i32, i32) #0
+declare i32 @llvm.amdgcn.readlane(i32, i32) nounwind readnone convergent
 
 ; CHECK-LABEL: {{^}}test_readlane_sreg_sreg:
 ; CHECK-NOT: v_readlane_b32
-define amdgpu_kernel void @test_readlane_sreg_sreg(i32 %src0, i32 %src1) #1 {
+define amdgpu_kernel void @test_readlane_sreg_sreg(i32 %src0, i32 %src1) nounwind {
   %readlane = call i32 @llvm.amdgcn.readlane(i32 %src0, i32 %src1)
   call void asm sideeffect "; use $0", "s"(i32 %readlane)
   ret void
@@ -12,7 +12,7 @@ define amdgpu_kernel void @test_readlane_sreg_sreg(i32 %src0, i32 %src1) #1 {
 
 ; CHECK-LABEL: {{^}}test_readlane_vreg_sreg:
 ; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @test_readlane_vreg_sreg(i32 %src0, i32 %src1) #1 {
+define amdgpu_kernel void @test_readlane_vreg_sreg(i32 %src0, i32 %src1) nounwind {
   %vgpr = call i32 asm sideeffect "; def $0", "=v"()
   %readlane = call i32 @llvm.amdgcn.readlane(i32 %vgpr, i32 %src1)
   call void asm sideeffect "; use $0", "s"(i32 %readlane)
@@ -21,7 +21,7 @@ define amdgpu_kernel void @test_readlane_vreg_sreg(i32 %src0, i32 %src1) #1 {
 
 ; CHECK-LABEL: {{^}}test_readlane_imm_sreg:
 ; CHECK-NOT: v_readlane_b32
-define amdgpu_kernel void @test_readlane_imm_sreg(ptr addrspace(1) %out, i32 %src1) #1 {
+define amdgpu_kernel void @test_readlane_imm_sreg(ptr addrspace(1) %out, i32 %src1) nounwind {
   %readlane = call i32 @llvm.amdgcn.readlane(i32 32, i32 %src1)
   store i32 %readlane, ptr addrspace(1) %out, align 4
   ret void
@@ -30,7 +30,7 @@ define amdgpu_kernel void @test_readlane_imm_sreg(ptr addrspace(1) %out, i32 %sr
 ; CHECK-LABEL: {{^}}test_readlane_vregs:
 ; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}}
 ; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, [[LANE]]
-define amdgpu_kernel void @test_readlane_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_readlane_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.in = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 %tid
   %args = load <2 x i32>, ptr addrspace(1) %gep.in
@@ -46,7 +46,7 @@ define amdgpu_kernel void @test_readlane_vregs(ptr addrspace(1) %out, ptr addrsp
 ; CHECK: s_mov_b32 m0, -1
 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]]
-define amdgpu_kernel void @test_readlane_m0_sreg(ptr addrspace(1) %out, i32 %src1) #1 {
+define amdgpu_kernel void @test_readlane_m0_sreg(ptr addrspace(1) %out, i32 %src1) nounwind {
   %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
   %readlane = call i32 @llvm.amdgcn.readlane(i32 %m0, i32 %src1)
   store i32 %readlane, ptr addrspace(1) %out, align 4
@@ -55,9 +55,9 @@ define amdgpu_kernel void @test_readlane_m0_sreg(ptr addrspace(1) %out, i32 %src
 
 ; CHECK-LABEL: {{^}}test_readlane_vgpr_imm:
 ; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 32
-define amdgpu_kernel void @test_readlane_vgpr_imm(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readlane_vgpr_imm(ptr addrspace(1) %out) nounwind {
   %vgpr = call i32 asm sideeffect "; def $0", "=v"()
-  %readlane = call i32 @llvm.amdgcn.readlane(i32 %vgpr, i32 32) #0
+  %readlane = call i32 @llvm.amdgcn.readlane(i32 %vgpr, i32 32) nounwind readnone convergent
   store i32 %readlane, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -70,15 +70,11 @@ define amdgpu_kernel void @test_readlane_vgpr_imm(ptr addrspace(1) %out) #1 {
 ; CHECK-NOT: readlane
 ; CHECK: v_mov_b32_e32 [[VCOPY:v[0-9]+]], [[SGPR]]
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VCOPY]]
-define amdgpu_kernel void @test_readlane_copy_from_sgpr(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readlane_copy_from_sgpr(ptr addrspace(1) %out) nounwind {
   %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
   %readfirstlane = call i32 @llvm.amdgcn.readlane(i32 %sgpr, i32 7)
   store i32 %readfirstlane, ptr addrspace(1) %out, align 4
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind readnone convergent }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
index 1cc592b74b5953..22e42d8809f024 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
 
-declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
-declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
+declare float @llvm.amdgcn.rsq.clamp.f32(float) nounwind readnone
+declare double @llvm.amdgcn.rsq.clamp.f64(double) nounwind readnone
 
 ; FUNC-LABEL: {{^}}rsq_clamp_f32:
 ; SI: v_rsq_clamp_f32_e32
@@ -12,7 +12,7 @@ declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
 ; VI-DAG: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
 ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xff7fffff, [[MIN]]
 ; VI: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @rsq_clamp_f32(ptr addrspace(1) %out, float %src) #0 {
+define amdgpu_kernel void @rsq_clamp_f32(ptr addrspace(1) %out, float %src) nounwind {
   %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
   store float %rsq_clamp, ptr addrspace(1) %out
   ret void
@@ -30,7 +30,7 @@ define amdgpu_kernel void @rsq_clamp_f32(ptr addrspace(1) %out, float %src) #0 {
 ; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}
 ; VI-DAG: v_min_f64 v[0:1], [[RSQ]], s[[[LOW1]]:[[HIGH1]]]
 ; VI-DAG: v_max_f64 v[0:1], v[0:1], s[[[LOW1]]:[[HIGH2]]]
-define amdgpu_kernel void @rsq_clamp_f64(ptr addrspace(1) %out, double %src) #0 {
+define amdgpu_kernel void @rsq_clamp_f64(ptr addrspace(1) %out, double %src) nounwind {
   %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
   store double %rsq_clamp, ptr addrspace(1) %out
   ret void
@@ -38,11 +38,8 @@ define amdgpu_kernel void @rsq_clamp_f64(ptr addrspace(1) %out, double %src) #0
 
 ; FUNC-LABEL: {{^}}rsq_clamp_undef_f32:
 ; SI-NOT: v_rsq_clamp_f32
-define amdgpu_kernel void @rsq_clamp_undef_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @rsq_clamp_undef_f32(ptr addrspace(1) %out) nounwind {
   %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
   store float %rsq_clamp, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll
index 1ceca55440ada5..968e755c25db3f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll
@@ -1,11 +1,11 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.amdgcn.rsq.legacy(float) #0
+declare float @llvm.amdgcn.rsq.legacy(float) nounwind readnone
 
 ; FUNC-LABEL: {{^}}rsq_legacy_f32:
 ; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @rsq_legacy_f32(ptr addrspace(1) %out, float %src) #1 {
-  %rsq = call float @llvm.amdgcn.rsq.legacy(float %src) #0
+define amdgpu_kernel void @rsq_legacy_f32(ptr addrspace(1) %out, float %src) nounwind {
+  %rsq = call float @llvm.amdgcn.rsq.legacy(float %src) nounwind readnone
   store float %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -13,27 +13,24 @@ define amdgpu_kernel void @rsq_legacy_f32(ptr addrspace(1) %out, float %src) #1
 ; TODO: Really these should be constant folded
 ; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_4.0
 ; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 4.0
-define amdgpu_kernel void @rsq_legacy_f32_constant_4.0(ptr addrspace(1) %out) #1 {
-  %rsq = call float @llvm.amdgcn.rsq.legacy(float 4.0) #0
+define amdgpu_kernel void @rsq_legacy_f32_constant_4.0(ptr addrspace(1) %out) nounwind {
+  %rsq = call float @llvm.amdgcn.rsq.legacy(float 4.0) nounwind readnone
   store float %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_100.0
 ; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 0x42c80000
-define amdgpu_kernel void @rsq_legacy_f32_constant_100.0(ptr addrspace(1) %out) #1 {
-  %rsq = call float @llvm.amdgcn.rsq.legacy(float 100.0) #0
+define amdgpu_kernel void @rsq_legacy_f32_constant_100.0(ptr addrspace(1) %out) nounwind {
+  %rsq = call float @llvm.amdgcn.rsq.legacy(float 100.0) nounwind readnone
   store float %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}rsq_legacy_undef_f32:
 ; SI-NOT: v_rsq_legacy_f32
-define amdgpu_kernel void @rsq_legacy_undef_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @rsq_legacy_undef_f32(ptr addrspace(1) %out) nounwind {
   %rsq = call float @llvm.amdgcn.rsq.legacy(float undef)
   store float %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
index 1b9086efb19a9d..47fc5152da514d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
@@ -1,13 +1,13 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.amdgcn.rsq.f32(float) #0
-declare double @llvm.amdgcn.rsq.f64(double) #0
+declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
+declare double @llvm.amdgcn.rsq.f64(double) nounwind readnone
 
 ; FUNC-LABEL: {{^}}rsq_f32:
 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @rsq_f32(ptr addrspace(1) %out, float %src) #1 {
-  %rsq = call float @llvm.amdgcn.rsq.f32(float %src) #0
+define amdgpu_kernel void @rsq_f32(ptr addrspace(1) %out, float %src) nounwind {
+  %rsq = call float @llvm.amdgcn.rsq.f32(float %src) nounwind readnone
   store float %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -15,24 +15,24 @@ define amdgpu_kernel void @rsq_f32(ptr addrspace(1) %out, float %src) #1 {
 ; TODO: Really these should be constant folded
 ; FUNC-LABEL: {{^}}rsq_f32_constant_4.0
 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0
-define amdgpu_kernel void @rsq_f32_constant_4.0(ptr addrspace(1) %out) #1 {
-  %rsq = call float @llvm.amdgcn.rsq.f32(float 4.0) #0
+define amdgpu_kernel void @rsq_f32_constant_4.0(ptr addrspace(1) %out) nounwind {
+  %rsq = call float @llvm.amdgcn.rsq.f32(float 4.0) nounwind readnone
   store float %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}rsq_f32_constant_100.0
 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000
-define amdgpu_kernel void @rsq_f32_constant_100.0(ptr addrspace(1) %out) #1 {
-  %rsq = call float @llvm.amdgcn.rsq.f32(float 100.0) #0
+define amdgpu_kernel void @rsq_f32_constant_100.0(ptr addrspace(1) %out) nounwind {
+  %rsq = call float @llvm.amdgcn.rsq.f32(float 100.0) nounwind readnone
   store float %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}rsq_f64:
 ; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @rsq_f64(ptr addrspace(1) %out, double %src) #1 {
-  %rsq = call double @llvm.amdgcn.rsq.f64(double %src) #0
+define amdgpu_kernel void @rsq_f64(ptr addrspace(1) %out, double %src) nounwind {
+  %rsq = call double @llvm.amdgcn.rsq.f64(double %src) nounwind readnone
   store double %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -40,27 +40,24 @@ define amdgpu_kernel void @rsq_f64(ptr addrspace(1) %out, double %src) #1 {
 ; TODO: Really these should be constant folded
 ; FUNC-LABEL: {{^}}rsq_f64_constant_4.0
 ; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, 4.0
-define amdgpu_kernel void @rsq_f64_constant_4.0(ptr addrspace(1) %out) #1 {
-  %rsq = call double @llvm.amdgcn.rsq.f64(double 4.0) #0
+define amdgpu_kernel void @rsq_f64_constant_4.0(ptr addrspace(1) %out) nounwind {
+  %rsq = call double @llvm.amdgcn.rsq.f64(double 4.0) nounwind readnone
   store double %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}rsq_f64_constant_100.0
 ; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, 0x40590000
-define amdgpu_kernel void @rsq_f64_constant_100.0(ptr addrspace(1) %out) #1 {
-  %rsq = call double @llvm.amdgcn.rsq.f64(double 100.0) #0
+define amdgpu_kernel void @rsq_f64_constant_100.0(ptr addrspace(1) %out) nounwind {
+  %rsq = call double @llvm.amdgcn.rsq.f64(double 100.0) nounwind readnone
   store double %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}rsq_undef_f32:
 ; SI-NOT: v_rsq_f32
-define amdgpu_kernel void @rsq_undef_f32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @rsq_undef_f32(ptr addrspace(1) %out) nounwind {
   %rsq = call float @llvm.amdgcn.rsq.f32(float undef)
   store float %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index a7d3115af29bff..0ad0b7911eb228 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck --check-prefix=VARIANT5 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefix=VARIANT6 %s
 
-define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) #0 {
+define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) nounwind {
 ; VARIANT0-LABEL: test_barrier:
 ; VARIANT0:       ; %bb.0: ; %entry
 ; VARIANT0-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -169,9 +169,5 @@ entry:
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
-attributes #2 = { nounwind readnone }
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll
index 4ab5e97964a857..4d17aa3a9d51ac 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GLOBAL-ISEL %s
 
-define amdgpu_kernel void @test1_s_barrier_signal(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test1_s_barrier_signal(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test1_s_barrier_signal:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -46,7 +46,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test2_s_barrier_signal(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test2_s_barrier_signal(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test2_s_barrier_signal:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -90,7 +90,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test3_s_barrier_signal(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test3_s_barrier_signal(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test3_s_barrier_signal:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -134,7 +134,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test1_s_barrier_signal_var(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test1_s_barrier_signal_var(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test1_s_barrier_signal_var:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -209,7 +209,7 @@ define void @test2_s_barrier_signal_var(i32 %arg) {
   ret void
 }
 
-define amdgpu_kernel void @test1_s_barrier_signal_isfirst(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test1_s_barrier_signal_isfirst(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test1_s_barrier_signal_isfirst:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b256 s[0:7], s[0:1], 0x24
@@ -263,7 +263,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test2_s_barrier_signal_isfirst(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test2_s_barrier_signal_isfirst(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test2_s_barrier_signal_isfirst:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b256 s[0:7], s[0:1], 0x24
@@ -317,7 +317,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test3_s_barrier_signal_isfirst(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test3_s_barrier_signal_isfirst(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test3_s_barrier_signal_isfirst:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b256 s[0:7], s[0:1], 0x24
@@ -371,7 +371,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test1_s_barrier_signal_isfirst_var(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test1_s_barrier_signal_isfirst_var(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test1_s_barrier_signal_isfirst_var:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b256 s[0:7], s[0:1], 0x24
@@ -495,7 +495,7 @@ define void @test2_s_barrier_signal_isfirst_var(ptr addrspace(1) %a, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @test1_s_barrier_init(ptr addrspace(1) %out, i32 %mbrCnt) #0 {
+define amdgpu_kernel void @test1_s_barrier_init(ptr addrspace(1) %out, i32 %mbrCnt) nounwind {
 ; GCN-LABEL: test1_s_barrier_init:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b96 s[0:2], s[0:1], 0x24
@@ -539,7 +539,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test2_s_barrier_init(ptr addrspace(1) %out, i32 %mbrCnt) #0 {
+define amdgpu_kernel void @test2_s_barrier_init(ptr addrspace(1) %out, i32 %mbrCnt) nounwind {
 ; GCN-LABEL: test2_s_barrier_init:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b96 s[0:2], s[0:1], 0x24
@@ -583,7 +583,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test3_s_barrier_init(ptr addrspace(1) %out, i32 %mbrCnt) #0 {
+define amdgpu_kernel void @test3_s_barrier_init(ptr addrspace(1) %out, i32 %mbrCnt) nounwind {
 ; GCN-LABEL: test3_s_barrier_init:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b96 s[0:2], s[0:1], 0x24
@@ -627,7 +627,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test4_s_barrier_init(ptr addrspace(1) %out, i32 %bar, i32 %mbrCnt) #0 {
+define amdgpu_kernel void @test4_s_barrier_init(ptr addrspace(1) %out, i32 %bar, i32 %mbrCnt) nounwind {
 ; GCN-LABEL: test4_s_barrier_init:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
@@ -709,7 +709,7 @@ define void @test5_s_barrier_init_m0(i32 %arg1 ,i32 %arg2) {
   ret void
 }
 
-define amdgpu_kernel void @test1_s_barrier_join(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test1_s_barrier_join(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test1_s_barrier_join:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -749,7 +749,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test2_s_barrier_join(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test2_s_barrier_join(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test2_s_barrier_join:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -789,7 +789,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test3_s_barrier_join(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test3_s_barrier_join(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test3_s_barrier_join:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -829,7 +829,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test4_s_barrier_join_m0(ptr addrspace(1) %out, i32 %bar) #0 {
+define amdgpu_kernel void @test4_s_barrier_join_m0(ptr addrspace(1) %out, i32 %bar) nounwind {
 ; GCN-LABEL: test4_s_barrier_join_m0:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b96 s[0:2], s[0:1], 0x24
@@ -901,7 +901,7 @@ define void @test5_s_barrier_join_m0(i32 %arg) {
   ret void
 }
 
-define amdgpu_kernel void @test1_s_barrier_leave(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test1_s_barrier_leave(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test1_s_barrier_leave:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b256 s[0:7], s[0:1], 0x24
@@ -955,7 +955,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test1_s_wakeup_barrier(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test1_s_wakeup_barrier(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test1_s_wakeup_barrier:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -995,7 +995,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test2_s_wakeup_barrier(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test2_s_wakeup_barrier(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test2_s_wakeup_barrier:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -1035,7 +1035,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test3_s_wakeup_barrier(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test3_s_wakeup_barrier(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test3_s_wakeup_barrier:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -1075,7 +1075,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test4_s_wakeup_barrier_m0(ptr addrspace(1) %out, i32 %bar) #0 {
+define amdgpu_kernel void @test4_s_wakeup_barrier_m0(ptr addrspace(1) %out, i32 %bar) nounwind {
 ; GCN-LABEL: test4_s_wakeup_barrier_m0:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b96 s[0:2], s[0:1], 0x24
@@ -1147,7 +1147,7 @@ define void @test5_s_wakeup_barrier_m0(i32 %arg) {
   ret void
 }
 
-define amdgpu_kernel void @test1_s_get_barrier_state(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test1_s_get_barrier_state(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test1_s_get_barrier_state:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_get_barrier_state s2, -1
@@ -1183,7 +1183,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test2_s_get_barrier_state(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test2_s_get_barrier_state(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test2_s_get_barrier_state:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_get_barrier_state s2, 1
@@ -1219,7 +1219,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test3_s_get_barrier_state(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test3_s_get_barrier_state(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test3_s_get_barrier_state:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_get_barrier_state s2, 0
@@ -1255,7 +1255,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test4_s_get_barrier_state_m0(ptr addrspace(1) %out, i32 %bar) #0 {
+define amdgpu_kernel void @test4_s_get_barrier_state_m0(ptr addrspace(1) %out, i32 %bar) nounwind {
 ; GCN-LABEL: test4_s_get_barrier_state_m0:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b96 s[0:2], s[0:1], 0x24
@@ -1329,7 +1329,7 @@ define i32 @test5_s_get_barrier_state_m0(i32 %arg) {
   ret i32 %state
 }
 
-define amdgpu_kernel void @test_barrier_convert(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_barrier_convert(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: test_barrier_convert:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -1371,20 +1371,16 @@ entry:
   store i32 %tmp4, ptr addrspace(1) %tmp1
   ret void
 }
-declare void @llvm.amdgcn.s.barrier() #1
-declare void @llvm.amdgcn.s.barrier.wait(i16) #1
-declare void @llvm.amdgcn.s.barrier.signal(i32) #1
-declare void @llvm.amdgcn.s.barrier.signal.var(i32) #1
-declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1
-declare i1 @llvm.amdgcn.s.barrier.signal.isfirst.var(i32) #1
-declare void @llvm.amdgcn.s.barrier.init(i32, i32) #1
-declare void @llvm.amdgcn.s.barrier.join(i32) #1
-declare i1 @llvm.amdgcn.s.barrier.leave() #1
-declare void @llvm.amdgcn.s.wakeup.barrier(i32) #1
-declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1
-declare i32 @llvm.amdgcn.s.get.barrier.state.var(i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
-attributes #2 = { nounwind readnone }
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
+declare void @llvm.amdgcn.s.barrier.wait(i16) convergent nounwind
+declare void @llvm.amdgcn.s.barrier.signal(i32) convergent nounwind
+declare void @llvm.amdgcn.s.barrier.signal.var(i32) convergent nounwind
+declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) convergent nounwind
+declare i1 @llvm.amdgcn.s.barrier.signal.isfirst.var(i32) convergent nounwind
+declare void @llvm.amdgcn.s.barrier.init(i32, i32) convergent nounwind
+declare void @llvm.amdgcn.s.barrier.join(i32) convergent nounwind
+declare i1 @llvm.amdgcn.s.barrier.leave() convergent nounwind
+declare void @llvm.amdgcn.s.wakeup.barrier(i32) convergent nounwind
+declare i32 @llvm.amdgcn.s.get.barrier.state(i32) convergent nounwind
+declare i32 @llvm.amdgcn.s.get.barrier.state.var(i32) convergent nounwind
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
index 12388878f05c24..a570acca931531 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
@@ -1,15 +1,15 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare void @llvm.amdgcn.s.dcache.inv() #0
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
+declare void @llvm.amdgcn.s.dcache.inv() nounwind
+declare void @llvm.amdgcn.s.waitcnt(i32) nounwind
 
 ; GCN-LABEL: {{^}}test_s_dcache_inv:
 ; GCN-NEXT: ; %bb.0:
 ; SI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7]
 ; VI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0x80,0xc0,0x00,0x00,0x00,0x00]
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @test_s_dcache_inv() #0 {
+define amdgpu_kernel void @test_s_dcache_inv() nounwind {
   call void @llvm.amdgcn.s.dcache.inv()
   ret void
 }
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test_s_dcache_inv() #0 {
 ; GCN-NEXT: ; %bb.0:
 ; GCN: s_dcache_inv
 ; GCN: s_waitcnt lgkmcnt(0) ; encoding
-define amdgpu_kernel void @test_s_dcache_inv_insert_wait() #0 {
+define amdgpu_kernel void @test_s_dcache_inv_insert_wait() nounwind {
   call void @llvm.amdgcn.s.dcache.inv()
   call void @llvm.amdgcn.s.waitcnt(i32 127)
   br label %end
@@ -27,5 +27,3 @@ end:
   store volatile i32 3, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
index 57f8f6073f1cde..f03b118d2cbefa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
@@ -1,15 +1,15 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare void @llvm.amdgcn.s.dcache.inv.vol() #0
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
+declare void @llvm.amdgcn.s.dcache.inv.vol() nounwind
+declare void @llvm.amdgcn.s.waitcnt(i32) nounwind
 
 ; GCN-LABEL: {{^}}test_s_dcache_inv_vol:
 ; GCN-NEXT: ; %bb.0:
 ; CI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7]
 ; VI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x88,0xc0,0x00,0x00,0x00,0x00]
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @test_s_dcache_inv_vol() #0 {
+define amdgpu_kernel void @test_s_dcache_inv_vol() nounwind {
   call void @llvm.amdgcn.s.dcache.inv.vol()
   ret void
 }
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test_s_dcache_inv_vol() #0 {
 ; GCN-NEXT: ; %bb.0:
 ; GCN: s_dcache_inv_vol
 ; GCN: s_waitcnt lgkmcnt(0) ; encoding
-define amdgpu_kernel void @test_s_dcache_inv_vol_insert_wait() #0 {
+define amdgpu_kernel void @test_s_dcache_inv_vol_insert_wait() nounwind {
   call void @llvm.amdgcn.s.dcache.inv.vol()
   call void @llvm.amdgcn.s.waitcnt(i32 127)
   br label %end
@@ -27,5 +27,3 @@ end:
   store volatile i32 3, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
index 4bdb0ee7ee4c85..20811d272948c1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
@@ -1,13 +1,13 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
 
-declare void @llvm.amdgcn.s.dcache.wb() #0
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
+declare void @llvm.amdgcn.s.dcache.wb() nounwind
+declare void @llvm.amdgcn.s.waitcnt(i32) nounwind
 
 ; VI-LABEL: {{^}}test_s_dcache_wb:
 ; VI-NEXT: ; %bb.0:
 ; VI-NEXT: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00]
 ; VI-NEXT: s_endpgm
-define amdgpu_kernel void @test_s_dcache_wb() #0 {
+define amdgpu_kernel void @test_s_dcache_wb() nounwind {
   call void @llvm.amdgcn.s.dcache.wb()
   ret void
 }
@@ -16,7 +16,7 @@ define amdgpu_kernel void @test_s_dcache_wb() #0 {
 ; VI-NEXT: ; %bb.0:
 ; VI: s_dcache_wb
 ; VI: s_waitcnt lgkmcnt(0) ; encoding
-define amdgpu_kernel void @test_s_dcache_wb_insert_wait() #0 {
+define amdgpu_kernel void @test_s_dcache_wb_insert_wait() nounwind {
   call void @llvm.amdgcn.s.dcache.wb()
   call void @llvm.amdgcn.s.waitcnt(i32 127)
   br label %end
@@ -25,5 +25,3 @@ end:
   store volatile i32 3, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
index b137cece5e6170..f7608713a8c961 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
@@ -1,13 +1,13 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
 
-declare void @llvm.amdgcn.s.dcache.wb.vol() #0
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
+declare void @llvm.amdgcn.s.dcache.wb.vol() nounwind
+declare void @llvm.amdgcn.s.waitcnt(i32) nounwind
 
 ; VI-LABEL: {{^}}test_s_dcache_wb_vol:
 ; VI-NEXT: ; %bb.0:
 ; VI-NEXT: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00]
 ; VI-NEXT: s_endpgm
-define amdgpu_kernel void @test_s_dcache_wb_vol() #0 {
+define amdgpu_kernel void @test_s_dcache_wb_vol() nounwind {
   call void @llvm.amdgcn.s.dcache.wb.vol()
   ret void
 }
@@ -16,7 +16,7 @@ define amdgpu_kernel void @test_s_dcache_wb_vol() #0 {
 ; VI-NEXT: ; %bb.0:
 ; VI: s_dcache_wb_vol
 ; VI: s_waitcnt lgkmcnt(0) ; encoding
-define amdgpu_kernel void @test_s_dcache_wb_vol_insert_wait() #0 {
+define amdgpu_kernel void @test_s_dcache_wb_vol_insert_wait() nounwind {
   call void @llvm.amdgcn.s.dcache.wb.vol()
   call void @llvm.amdgcn.s.waitcnt(i32 127)
   br label %end
@@ -25,5 +25,3 @@ end:
   store volatile i32 3, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll
index 69ed9d553667f6..d38adf982fe5a4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.decperflevel(i32) #0
+declare void @llvm.amdgcn.s.decperflevel(i32) nounwind
 
 ; GCN-LABEL: {{^}}test_s_decperflevel:
 ; GCN: s_decperflevel 0{{$}}
@@ -20,7 +20,7 @@ declare void @llvm.amdgcn.s.decperflevel(i32) #0
 ; GCN: s_decperflevel 13{{$}}
 ; GCN: s_decperflevel 14{{$}}
 ; GCN: s_decperflevel 15{{$}}
-define amdgpu_kernel void @test_s_decperflevel(i32 %x) #0 {
+define amdgpu_kernel void @test_s_decperflevel(i32 %x) nounwind {
   call void @llvm.amdgcn.s.decperflevel(i32 0)
   call void @llvm.amdgcn.s.decperflevel(i32 1)
   call void @llvm.amdgcn.s.decperflevel(i32 2)
@@ -39,5 +39,3 @@ define amdgpu_kernel void @test_s_decperflevel(i32 %x) #0 {
   call void @llvm.amdgcn.s.decperflevel(i32 15)
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
index ffab3449f7cb97..76954141813954 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
-declare i32 @llvm.amdgcn.s.get.waveid.in.workgroup() #0
+declare i32 @llvm.amdgcn.s.get.waveid.in.workgroup() nounwind
 
 ; GCN-LABEL: {{^}}test_s_get_waveid_in_workgroup:
 ; GFX10: global_store_dword
@@ -15,5 +15,3 @@ define amdgpu_kernel void @test_s_get_waveid_in_workgroup(ptr addrspace(1) %out)
   store i32 %v, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll
index 77bea2f4b4d508..27603f62cdf535 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll
@@ -1,15 +1,13 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.s.getpc() #0
+declare i64 @llvm.amdgcn.s.getpc() nounwind readnone speculatable
 
 ; GCN-LABEL: {{^}}test_s_getpc:
 ; GCN: s_load_dwordx2
 ; GCN-DAG: s_getpc_b64 s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @test_s_getpc(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_s_getpc(ptr addrspace(1) %out) nounwind readnone speculatable {
   %tmp = call i64 @llvm.amdgcn.s.getpc() #1
   store volatile i64 %tmp, ptr addrspace(1) %out, align 8
   ret void
 }
-
-attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll
index 52bdfbd62680f3..b69e561a12eae6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll
@@ -20,13 +20,10 @@ define amdgpu_kernel void @s_getreg_test(ptr addrspace(1) %out) { ; simm16=45574
 ; GCN-LABEL: {{^}}readnone_s_getreg_test:
 ; GCN: s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_LDS_ALLOC, 8, 23)
 define amdgpu_kernel void @readnone_s_getreg_test(ptr addrspace(1) %out) { ; simm16=45574 for lds size.
-  %lds_size_64dwords = call i32 @llvm.amdgcn.s.getreg(i32 45574) #1
+  %lds_size_64dwords = call i32 @llvm.amdgcn.s.getreg(i32 45574) nounwind readnone
   %lds_size_bytes = shl i32 %lds_size_64dwords, 8
   store i32 %lds_size_bytes, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.s.getreg(i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.s.getreg(i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll
index 3e35593f817458..7a2832191fd46f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.incperflevel(i32) #0
+declare void @llvm.amdgcn.s.incperflevel(i32) nounwind
 
 ; GCN-LABEL: {{^}}test_s_incperflevel:
 ; GCN: s_incperflevel 0{{$}}
@@ -20,7 +20,7 @@ declare void @llvm.amdgcn.s.incperflevel(i32) #0
 ; GCN: s_incperflevel 13{{$}}
 ; GCN: s_incperflevel 14{{$}}
 ; GCN: s_incperflevel 15{{$}}
-define amdgpu_kernel void @test_s_incperflevel(i32 %x) #0 {
+define amdgpu_kernel void @test_s_incperflevel(i32 %x) nounwind {
   call void @llvm.amdgcn.s.incperflevel(i32 0)
   call void @llvm.amdgcn.s.incperflevel(i32 1)
   call void @llvm.amdgcn.s.incperflevel(i32 2)
@@ -39,5 +39,3 @@ define amdgpu_kernel void @test_s_incperflevel(i32 %x) #0 {
   call void @llvm.amdgcn.s.incperflevel(i32 15)
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll
index 82468b6a2076b3..a6ce57f495e288 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll
@@ -3,7 +3,7 @@
 
 ; ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.s.memrealtime
 
-declare i64 @llvm.amdgcn.s.memrealtime() #0
+declare i64 @llvm.amdgcn.s.memrealtime() nounwind
 
 ; GCN-LABEL: {{^}}test_s_memrealtime:
 ; GCN-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
@@ -13,7 +13,7 @@ declare i64 @llvm.amdgcn.s.memrealtime() #0
 ; GCN-NOT: lgkmcnt
 ; GCN: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: _store_dwordx2
-define amdgpu_kernel void @test_s_memrealtime(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_s_memrealtime(ptr addrspace(1) %out) nounwind {
   %cycle0 = call i64 @llvm.amdgcn.s.memrealtime()
   store volatile i64 %cycle0, ptr addrspace(1) %out
 
@@ -21,5 +21,3 @@ define amdgpu_kernel void @test_s_memrealtime(ptr addrspace(1) %out) #0 {
   store volatile i64 %cycle1, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
index 1d7edb2fd9a4e1..6bc380e36cd688 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.s.memtime() #0
+declare i64 @llvm.amdgcn.s.memtime() nounwind
 
 ; GCN-LABEL: {{^}}test_s_memtime:
 ; GCN-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
@@ -13,7 +13,7 @@ declare i64 @llvm.amdgcn.s.memtime() #0
 ; SIVI-NOT: lgkmcnt
 ; GCN: s_memtime s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: {{buffer|global}}_store_dwordx2
-define amdgpu_kernel void @test_s_memtime(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_s_memtime(ptr addrspace(1) %out) nounwind {
   %cycle0 = call i64 @llvm.amdgcn.s.memtime()
   store volatile i64 %cycle0, ptr addrspace(1) %out
 
@@ -21,5 +21,3 @@ define amdgpu_kernel void @test_s_memtime(ptr addrspace(1) %out) #0 {
   store volatile i64 %cycle1, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll
index 087f798b58ff84..918d00d581d1aa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll
@@ -4,9 +4,9 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
 
-declare void @llvm.amdgcn.s.setprio(i16) #0
+declare void @llvm.amdgcn.s.setprio(i16) nounwind
 
-define void @test_llvm_amdgcn_s_setprio() #0 {
+define void @test_llvm_amdgcn_s_setprio() nounwind {
 ; GFX9-LABEL: test_llvm_amdgcn_s_setprio:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
@@ -45,5 +45,3 @@ define void @test_llvm_amdgcn_s_setprio() #0 {
   call void @llvm.amdgcn.s.setprio(i16 -1)
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
index 05186ac2aa28f6..8bcd264eddc44b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
@@ -1506,6 +1506,4 @@ define amdgpu_ps void @test_minus_2047(i32 inreg %var.mode) {
 ;   ret void
 ; }
 
-declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll
index e3a577ebe49f76..bc26c935e9d9b4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.sleep(i32) #0
+declare void @llvm.amdgcn.s.sleep(i32) nounwind
 
 ; GCN-LABEL: {{^}}test_s_sleep:
 ; GCN: s_sleep 0{{$}}
@@ -20,7 +20,7 @@ declare void @llvm.amdgcn.s.sleep(i32) #0
 ; GCN: s_sleep 13{{$}}
 ; GCN: s_sleep 14{{$}}
 ; GCN: s_sleep 15{{$}}
-define amdgpu_kernel void @test_s_sleep(i32 %x) #0 {
+define amdgpu_kernel void @test_s_sleep(i32 %x) nounwind {
   call void @llvm.amdgcn.s.sleep(i32 0)
   call void @llvm.amdgcn.s.sleep(i32 1)
   call void @llvm.amdgcn.s.sleep(i32 2)
@@ -41,5 +41,3 @@ define amdgpu_kernel void @test_s_sleep(i32 %x) #0 {
   call void @llvm.amdgcn.s.sleep(i32 15)
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll
index 08c77148f6ae13..f2c0683851d33a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll
@@ -7,12 +7,10 @@
 ; GFX11: s_wait_event 0x0
 ; GFX12: s_wait_event 0x1
 
-define amdgpu_ps void @test_wait_event() #0 {
+define amdgpu_ps void @test_wait_event() nounwind {
 entry:
-  call void @llvm.amdgcn.s.wait.event.export.ready() #0
+  call void @llvm.amdgcn.s.wait.event.export.ready() nounwind
   ret void
 }
 
-declare void @llvm.amdgcn.s.wait.event.export.ready() #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.s.wait.event.export.ready() nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
index efaf47208b1f1b..349fec242bdd26 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
@@ -43,10 +43,7 @@ define amdgpu_ps void @test3(<8 x i32> inreg %rsrc, i32 %c) {
   ret void
 }
 
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
+declare void @llvm.amdgcn.s.waitcnt(i32) nounwind
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll
index 87c92137cddee3..b7e1c0fe0e053b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.sad.hi.u8(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.sad.hi.u8(i32, i32, i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_sad_hi_u8:
 ; GCN: v_sad_hi_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_sad_hi_u8(ptr addrspace(1) %out, i32 %src) {
-  %result= call i32 @llvm.amdgcn.sad.hi.u8(i32 %src, i32 100, i32 100) #0
+  %result= call i32 @llvm.amdgcn.sad.hi.u8(i32 %src, i32 100, i32 100) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -14,9 +14,7 @@ define amdgpu_kernel void @v_sad_hi_u8(ptr addrspace(1) %out, i32 %src) {
 ; GCN-LABEL: {{^}}v_sad_hi_u8_non_immediate:
 ; GCN: v_sad_hi_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @v_sad_hi_u8_non_immediate(ptr addrspace(1) %out, i32 %src, i32 %a, i32 %b) {
-  %result= call i32 @llvm.amdgcn.sad.hi.u8(i32 %src, i32 %a, i32 %b) #0
+  %result= call i32 @llvm.amdgcn.sad.hi.u8(i32 %src, i32 %a, i32 %b) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll
index 2c013cc35ad523..4a9a3be663a600 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.sad.u16(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.sad.u16(i32, i32, i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_sad_u16:
 ; GCN: v_sad_u16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_sad_u16(ptr addrspace(1) %out, i32 %src) {
-  %result= call i32 @llvm.amdgcn.sad.u16(i32 %src, i32 100, i32 100) #0
+  %result= call i32 @llvm.amdgcn.sad.u16(i32 %src, i32 100, i32 100) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -14,9 +14,7 @@ define amdgpu_kernel void @v_sad_u16(ptr addrspace(1) %out, i32 %src) {
 ; GCN-LABEL: {{^}}v_sad_u16_non_immediate:
 ; GCN: v_sad_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @v_sad_u16_non_immediate(ptr addrspace(1) %out, i32 %src, i32 %a, i32 %b) {
-  %result= call i32 @llvm.amdgcn.sad.u16(i32 %src, i32 %a, i32 %b) #0
+  %result= call i32 @llvm.amdgcn.sad.u16(i32 %src, i32 %a, i32 %b) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll
index 84b663a508a423..563bd253b2951b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.sad.u8(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.sad.u8(i32, i32, i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_sad_u8:
 ; GCN: v_sad_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_sad_u8(ptr addrspace(1) %out, i32 %src) {
-  %result= call i32 @llvm.amdgcn.sad.u8(i32 %src, i32 100, i32 100) #0
+  %result= call i32 @llvm.amdgcn.sad.u8(i32 %src, i32 100, i32 100) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -14,9 +14,7 @@ define amdgpu_kernel void @v_sad_u8(ptr addrspace(1) %out, i32 %src) {
 ; GCN-LABEL: {{^}}v_sad_u8_non_immediate:
 ; GCN: v_sad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @v_sad_u8_non_immediate(ptr addrspace(1) %out, i32 %src, i32 %a, i32 %b) {
-  %result= call i32 @llvm.amdgcn.sad.u8(i32 %src, i32 %a, i32 %b) #0
+  %result= call i32 @llvm.amdgcn.sad.u8(i32 %src, i32 %a, i32 %b) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll
index 25b36173cc65b5..276159e6eb82bd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll
@@ -3,7 +3,7 @@
 
 ; GCN-LABEL: {{^}}bfe_i32_arg_arg_arg:
 ; GCN: v_bfe_i32
-define amdgpu_kernel void @bfe_i32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_i32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src1)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -11,7 +11,7 @@ define amdgpu_kernel void @bfe_i32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0,
 
 ; GCN-LABEL: {{^}}bfe_i32_arg_arg_imm:
 ; GCN: v_bfe_i32
-define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -19,7 +19,7 @@ define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0,
 
 ; GCN-LABEL: {{^}}bfe_i32_arg_imm_arg:
 ; GCN: v_bfe_i32
-define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -27,7 +27,7 @@ define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0,
 
 ; GCN-LABEL: {{^}}bfe_i32_imm_arg_arg:
 ; GCN: v_bfe_i32
-define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -35,7 +35,7 @@ define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1,
 
 ; GCN-LABEL: {{^}}v_bfe_print_arg:
 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
-define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(1) %src0) #0 {
+define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(1) %src0) nounwind {
   %load = load i32, ptr addrspace(1) %src0, align 4
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
@@ -45,7 +45,7 @@ define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(
 ; GCN-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
   %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0)
   store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
@@ -54,7 +54,7 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out,
 ; GCN-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
   %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0)
   store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
@@ -64,7 +64,7 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out,
 ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31)
@@ -78,7 +78,7 @@ define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31)
@@ -90,7 +90,7 @@ define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: buffer_load_dword
 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
@@ -103,7 +103,7 @@ define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1)
   store i32 %bfe, ptr addrspace(1) %out, align 4
@@ -115,7 +115,7 @@ define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31)
   store i32 %bfe, ptr addrspace(1) %out, align 4
@@ -127,7 +127,7 @@ define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(
 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24)
   store i32 %bfe, ptr addrspace(1) %out, align 4
@@ -139,7 +139,7 @@ define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(
 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8)
   store i32 %bfe, ptr addrspace(1) %out, align 4
@@ -150,7 +150,7 @@ define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(
 ; GCN: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %shl = ashr i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
@@ -161,7 +161,7 @@ define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(
 ; GCN-NOT: lshr
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %shl = lshr i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
@@ -173,7 +173,7 @@ define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -184,7 +184,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -195,7 +195,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -206,7 +206,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -217,7 +217,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -228,7 +228,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -239,7 +239,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -250,7 +250,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -261,7 +261,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -272,7 +272,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -283,7 +283,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) #
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -294,7 +294,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out)
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -305,7 +305,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out)
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -316,7 +316,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out)
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -327,7 +327,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out)
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -338,7 +338,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out)
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -349,7 +349,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out)
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -360,7 +360,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out)
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -371,7 +371,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out)
 ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[VREG]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out) nounwind {
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1)
   store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
@@ -383,7 +383,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out)
 ; GCN-NOT: v_ashr
 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24
 ; GCN: buffer_store_dword [[BFE]],
-define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24)
   %shl = shl i32 %bfe, 8
@@ -399,7 +399,7 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrsp
 ; GCN: v_add_{{[iu]}}32_e32 [[TMP1:v[0-9]+]], vcc, [[BFE]], [[TMP0]]
 ; GCN: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
 ; GCN: buffer_store_dword [[TMP2]]
-define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %src = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
   %div = sdiv i32 %bfe, 2
@@ -410,7 +410,7 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr
 ; GCN-LABEL: {{^}}bfe_0_width:
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %load = load i32, ptr addrspace(1) %ptr, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0)
   store i32 %bfe, ptr addrspace(1) %out, align 4
@@ -421,7 +421,7 @@ define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GCN: v_bfe_i32
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %load = load i32, ptr addrspace(1) %ptr, align 4
   %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
   %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
@@ -432,7 +432,7 @@ define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GCN-LABEL: {{^}}bfe_8_bfe_16:
 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %load = load i32, ptr addrspace(1) %ptr, align 4
   %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
   %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16)
@@ -445,7 +445,7 @@ define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1)
 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %load = load i32, ptr addrspace(1) %ptr, align 4
   %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16)
   %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
@@ -458,7 +458,7 @@ define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1)
 ; GCN: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8)
   %shl = shl i32 %bfe, 24
@@ -468,7 +468,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32
 }
 
 ; GCN-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
-define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0)
   %shl = shl i32 %bfe, 24
@@ -481,7 +481,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out
 ; GCN: buffer_load_sbyte
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %load = load i8, ptr addrspace(1) %ptr, align 1
   %sext = sext i8 %load to i32
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8)
@@ -495,7 +495,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr add
 ; GCN-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}}
 ; GCN-NOT: {{[^@]}}bfe
 ; GCN: s_endpgm
-define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr addrspace(1) %ptr) nounwind {
   %load = load i8, ptr addrspace(1) %ptr, align 1
   %sext = sext i8 %load to i32
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0)
@@ -510,7 +510,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr a
 ; GCN-NOT: shl
 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
 ; GCN: s_endpgm
-define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %shr = ashr i32 %shl, 31
@@ -525,7 +525,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, pt
 ; GCN-NOT: shr
 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
 ; GCN: s_endpgm
-define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 30
   %shr = ashr i32 %shl, 30
@@ -541,7 +541,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, pt
 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2
 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
 ; GCN: s_endpgm
-define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 30
   %shr = ashr i32 %shl, 30
@@ -550,7 +550,4 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, pt
   ret void
 }
 
-declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll
index 95e3446bed081b..e65e7b81c58cc2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-define amdgpu_kernel void @test_sched_barrier() #0 {
+define amdgpu_kernel void @test_sched_barrier() nounwind {
 ; GCN-LABEL: test_sched_barrier:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    ; sched_barrier mask(0x00000000)
@@ -10,14 +10,11 @@ define amdgpu_kernel void @test_sched_barrier() #0 {
 ; GCN-NEXT:    ; sched_barrier mask(0x0000000F)
 ; GCN-NEXT:    s_endpgm
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 0) #1
-  call void @llvm.amdgcn.sched.barrier(i32 1) #1
-  call void @llvm.amdgcn.sched.barrier(i32 4) #1
-  call void @llvm.amdgcn.sched.barrier(i32 15) #1
+  call void @llvm.amdgcn.sched.barrier(i32 0) convergent nounwind
+  call void @llvm.amdgcn.sched.barrier(i32 1) convergent nounwind
+  call void @llvm.amdgcn.sched.barrier(i32 4) convergent nounwind
+  call void @llvm.amdgcn.sched.barrier(i32 15) convergent nounwind
   ret void
 }
 
-declare void @llvm.amdgcn.sched.barrier(i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
+declare void @llvm.amdgcn.sched.barrier(i32) convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll
index c2e74eb05d1645..9b0dc1d4db1131 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -misched-cluster=0 < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -misched-cluster=0 -amdgpu-igrouplp-exact-solver-max-branches=250000 < %s | FileCheck -check-prefix=EXACTCUTOFF %s
 
-define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_cluster(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
+define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_cluster(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) nounwind "amdgpu-flat-work-group-size"="1,32" {
 ; GCN-LABEL: test_sched_group_barrier_pipeline_WMMA_cluster:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -172,7 +172,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_interleave(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
+define amdgpu_kernel void @test_sched_group_barrier_pipeline_WMMA_interleave(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) nounwind "amdgpu-flat-work-group-size"="1,32" {
 ; GCN-LABEL: test_sched_group_barrier_pipeline_WMMA_interleave:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
@@ -394,10 +394,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare void @llvm.amdgcn.sched.group.barrier(i32, i32, i32) #1
-declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half> , <16 x half>, i1 immarg) #1
-
-attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,32" }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare void @llvm.amdgcn.sched.group.barrier(i32, i32, i32) nounwind
+declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half> , <16 x half>, i1 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll
index 10f09b6390abae..c3e17cf3e5b806 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -misched-cluster=0  < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -misched-cluster=0 -amdgpu-igrouplp-exact-solver-max-branches=250000 < %s | FileCheck -check-prefix=EXACTCUTOFF %s
 
-define amdgpu_kernel void @test_sched_group_barrier() #0 {
+define amdgpu_kernel void @test_sched_group_barrier() nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_sched_group_barrier:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    ; sched_group_barrier mask(0x00000000) size(1) SyncID(2)
@@ -19,14 +19,14 @@ define amdgpu_kernel void @test_sched_group_barrier() #0 {
 ; EXACTCUTOFF-NEXT:    ; sched_group_barrier mask(0x0000000F) size(10000) SyncID(-1)
 ; EXACTCUTOFF-NEXT:    s_endpgm
 entry:
-  call void @llvm.amdgcn.sched.group.barrier(i32 0, i32 1, i32 2) #1
-  call void @llvm.amdgcn.sched.group.barrier(i32 1, i32 2, i32 4) #1
-  call void @llvm.amdgcn.sched.group.barrier(i32 4, i32 8, i32 16) #1
-  call void @llvm.amdgcn.sched.group.barrier(i32 15, i32 10000, i32 -1) #1
+  call void @llvm.amdgcn.sched.group.barrier(i32 0, i32 1, i32 2) nounwind
+  call void @llvm.amdgcn.sched.group.barrier(i32 1, i32 2, i32 4) nounwind
+  call void @llvm.amdgcn.sched.group.barrier(i32 4, i32 8, i32 16) nounwind
+  call void @llvm.amdgcn.sched.group.barrier(i32 15, i32 10000, i32 -1) nounwind
   ret void
 }
 
-define amdgpu_kernel void @test_sched_group_barrier_pipeline_READ_VALU_WRITE(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out) #0 {
+define amdgpu_kernel void @test_sched_group_barrier_pipeline_READ_VALU_WRITE(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out) nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_sched_group_barrier_pipeline_READ_VALU_WRITE:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -160,7 +160,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_READ_VALU_WRITE(ptr
 ; EXACTCUTOFF-NEXT:    ; sched_group_barrier mask(0x00000002) size(30) SyncID(0)
 ; EXACTCUTOFF-NEXT:    ; sched_group_barrier mask(0x00000040) size(8) SyncID(0)
 ; EXACTCUTOFF-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %gep1 = getelementptr <32 x i32>, ptr addrspace(1) %in, i32 %tid
   %load = load <32 x i32>, ptr addrspace(1) %gep1
   %mul = mul <32 x i32> %load, %load
@@ -175,7 +175,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_READ_VALU_WRITE(ptr
   ret void
 }
 
-define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VALU(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out) #0 {
+define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VALU(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out) nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_sched_group_barrier_pipeline_alternating_READ_VALU:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -335,7 +335,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VA
 ; EXACTCUTOFF-NEXT:    ; sched_group_barrier mask(0x00000002) size(2) SyncID(0)
 ; EXACTCUTOFF-NEXT:    ; sched_group_barrier mask(0x00000040) size(8) SyncID(0)
 ; EXACTCUTOFF-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %gep1 = getelementptr <32 x i32>, ptr addrspace(1) %in, i32 %tid
   %load = load <32 x i32>, ptr addrspace(1) %gep1
   %mul = mul <32 x i32> %load, %load
@@ -378,7 +378,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VA
   ret void
 }
 
-define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VALU_WRITE(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out) #0 {
+define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VALU_WRITE(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out) nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_sched_group_barrier_pipeline_alternating_READ_VALU_WRITE:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -554,7 +554,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VA
 ; EXACTCUTOFF-NEXT:    ; sched_group_barrier mask(0x00000002) size(2) SyncID(0)
 ; EXACTCUTOFF-NEXT:    ; sched_group_barrier mask(0x00000040) size(1) SyncID(0)
 ; EXACTCUTOFF-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #2
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
   %gep1 = getelementptr <32 x i32>, ptr addrspace(1) %in, i32 %tid
   %load = load <32 x i32>, ptr addrspace(1) %gep1
   %mul = mul <32 x i32> %load, %load
@@ -611,7 +611,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VA
   ret void
 }
 
-define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
+define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_sched_group_barrier_pipeline_MFMA_cluster:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -859,7 +859,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
+define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1185,7 +1185,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out, <5 x float> %in1) #0 {
+define amdgpu_kernel void @test_sched_group_barrier_pipeline_interleave_EXP_MFMA(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out, <5 x float> %in1) nounwind "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: test_sched_group_barrier_pipeline_interleave_EXP_MFMA:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
@@ -1613,11 +1613,7 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare void @llvm.amdgcn.sched.group.barrier(i32, i32, i32) #1
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) #1
-declare float @llvm.exp.f32(float) #2
-
-attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+declare void @llvm.amdgcn.sched.group.barrier(i32, i32, i32) nounwind
+declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) nounwind
+declare float @llvm.exp.f32(float) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
index fdd457ca992ea8..1b138666d9d148 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
@@ -69,7 +69,7 @@ body:
 ; VIPLUS-NEXT: s_nop 0
 ; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP)
 ; GCN-NEXT: s_endpgm
-define amdgpu_gs void @sendmsg(i32 inreg %a) #0 {
+define amdgpu_gs void @sendmsg(i32 inreg %a) nounwind {
   call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %a)
   ret void
 }
@@ -79,7 +79,7 @@ define amdgpu_gs void @sendmsg(i32 inreg %a) #0 {
 ; VIPLUS-NEXT: s_nop 0
 ; GCN-NEXT: s_sendmsghalt sendmsg(MSG_INTERRUPT)
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @sendmsghalt(i32 inreg %a) #0 {
+define amdgpu_kernel void @sendmsghalt(i32 inreg %a) nounwind {
   call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %a)
   ret void
 }
@@ -149,7 +149,7 @@ body:
 ; GCN-LABEL: {{^}}if_sendmsg:
 ; GCN: s_cbranch_execz
 ; GCN: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
-define amdgpu_gs void @if_sendmsg(i32 %flag) #0 {
+define amdgpu_gs void @if_sendmsg(i32 %flag) nounwind {
   %cc = icmp eq i32 %flag, 0
   br i1 %cc, label %sendmsg, label %end
 
@@ -161,7 +161,5 @@ end:
   ret void
 }
 
-declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
-declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.s.sendmsg(i32, i32) nounwind
+declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
index b3acd4949301e1..ebb9d7215d1165 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
@@ -48,7 +48,7 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
 ; GFX10_W64-NEXT:    s_not_b64 exec, exec
 ; GFX10_W64-NEXT:    global_store_dword v[8:9], v0, off
 ; GFX10_W64-NEXT:    s_endpgm
-  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) #0
+  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) convergent readnone willreturn nocallback nofree
   store i32 %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -101,7 +101,7 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i6
 ; GFX10_W64-NEXT:    s_not_b64 exec, exec
 ; GFX10_W64-NEXT:    global_store_dwordx2 v[8:9], v[0:1], off
 ; GFX10_W64-NEXT:    s_endpgm
-  %tmp = call i64 @llvm.amdgcn.set.inactive.chain.arg.i64(i64 %active, i64 %inactive) #0
+  %tmp = call i64 @llvm.amdgcn.set.inactive.chain.arg.i64(i64 %active, i64 %inactive) convergent readnone willreturn nocallback nofree
   store i64 %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -183,7 +183,7 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX10_W64-NEXT:    global_store_dword v[8:9], v2, off
 ; GFX10_W64-NEXT:    s_endpgm
-  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) #0
+  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) convergent readnone willreturn nocallback nofree
   %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp, i32 353, i32 15, i32 15, i1 false)
   %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %dpp)
   store i32 %wwm, ptr addrspace(1) %out
@@ -479,7 +479,7 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
 ; DAGISEL10_W64-NEXT:    global_store_dword v[41:42], v0, off
 ; DAGISEL10_W64-NEXT:    s_endpgm
   call amdgpu_gfx void @gfx_callee(<12 x i32> zeroinitializer)
-  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) #0
+  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) convergent readnone willreturn nocallback nofree
   %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp)
   store i32 %wwm, ptr addrspace(1) %out
   ret void
@@ -776,16 +776,14 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
 ; DAGISEL10_W64-NEXT:    global_store_dword v[41:42], v0, off
 ; DAGISEL10_W64-NEXT:    s_endpgm
   call amdgpu_gfx void @gfx_callee(<12 x i32> zeroinitializer)
-  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) #0
+  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) convergent readnone willreturn nocallback nofree
   %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp)
   store i32 %wwm, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32, i32) #0
-declare i64 @llvm.amdgcn.set.inactive.chain.arg.i64(i64, i64) #0
+declare i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32, i32) convergent readnone willreturn nocallback nofree
+declare i64 @llvm.amdgcn.set.inactive.chain.arg.i64(i64, i64) convergent readnone willreturn nocallback nofree
 declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg)
 declare i32 @llvm.amdgcn.strict.wwm.i32(i32)
 declare amdgpu_gfx void @gfx_callee(<12 x i32>)
-
-attributes #0 = { convergent readnone willreturn nocallback nofree}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
index 8302af7450ed9d..aea61d3c676f17 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
@@ -16,7 +16,7 @@ define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) {
 ; GCN-NEXT:    s_not_b64 exec, exec
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
-  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
+  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) convergent readnone
   store i32 %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -31,7 +31,7 @@ define amdgpu_kernel void @set_inactive_imm_poison(ptr addrspace(1) %out) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
-  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 1, i32 poison) #0
+  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 1, i32 poison) convergent readnone
   store i32 %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -53,7 +53,7 @@ define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) {
 ; GCN-NEXT:    s_not_b64 exec, exec
 ; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-NEXT:    s_endpgm
-  %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0
+  %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) convergent readnone
   store i64 %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -69,7 +69,7 @@ define amdgpu_kernel void @set_inactive_imm_poison_64(ptr addrspace(1) %out) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
-  %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 1, i64 poison) #0
+  %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 1, i64 poison) convergent readnone
   store i64 %tmp, ptr addrspace(1) %out
   ret void
 }
@@ -108,7 +108,7 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
 ; GCN-NEXT:    s_endpgm
   %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 0, i32 0)
   %cmp = icmp eq i32 %val, 56
-  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
+  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) convergent readnone
   br i1 %cmp, label %.zero, label %.one
 
 .zero:
@@ -124,8 +124,6 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
   ret void
 }
 
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
-declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) convergent readnone
+declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) convergent readnone
 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
-
-attributes #0 = { convergent readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll
index 937b8bf94eeae3..bb9c3dbc659a58 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll
@@ -1,14 +1,14 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.sffbh.i32(i32) #1
+declare i32 @llvm.amdgcn.sffbh.i32(i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}s_flbit:
 ; GCN: s_load_dword [[VAL:s[0-9]+]],
 ; GCN: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]]
 ; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
 ; GCN: buffer_store_dword [[VRESULT]],
-define amdgpu_kernel void @s_flbit(ptr addrspace(1) noalias %out, i32 %val) #0 {
+define amdgpu_kernel void @s_flbit(ptr addrspace(1) noalias %out, i32 %val) nounwind {
   %r = call i32 @llvm.amdgcn.sffbh.i32(i32 %val)
   store i32 %r, ptr addrspace(1) %out, align 4
   ret void
@@ -18,12 +18,9 @@ define amdgpu_kernel void @s_flbit(ptr addrspace(1) noalias %out, i32 %val) #0 {
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]],
 ; GCN: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]]
 ; GCN: buffer_store_dword [[RESULT]],
-define amdgpu_kernel void @v_flbit(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
+define amdgpu_kernel void @v_flbit(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
   %val = load i32, ptr addrspace(1) %valptr, align 4
   %r = call i32 @llvm.amdgcn.sffbh.i32(i32 %val)
   store i32 %r, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll
index 2b61ccad1d6529..a1a3826e987630 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll
@@ -1,15 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.sin.f32(float) #0
+declare float @llvm.amdgcn.sin.f32(float) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_sin_f32:
 ; GCN: v_sin_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @v_sin_f32(ptr addrspace(1) %out, float %src) #1 {
-  %sin = call float @llvm.amdgcn.sin.f32(float %src) #0
+define amdgpu_kernel void @v_sin_f32(ptr addrspace(1) %out, float %src) nounwind {
+  %sin = call float @llvm.amdgcn.sin.f32(float %src) nounwind readnone
   store float %sin, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
index 5fb50d7e8589a7..6946aa7662c06f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
@@ -279,18 +279,14 @@ END:
   ret float %r
 }
 
-declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #2
-declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare float @llvm.amdgcn.wqm.f32(float) #3
-declare float @llvm.amdgcn.softwqm.f32(float) #3
-declare i32 @llvm.amdgcn.softwqm.i32(i32) #3
-declare float @llvm.amdgcn.strict.wwm.f32(float) #3
-declare float @llvm.amdgcn.wwm.f32(float) #3
-declare void @llvm.amdgcn.wqm.demote(i1) #1
-
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readonly }
-attributes #3 = { nounwind readnone }
+declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.wqm.f32(float) nounwind readnone
+declare float @llvm.amdgcn.softwqm.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.softwqm.i32(i32) nounwind readnone
+declare float @llvm.amdgcn.strict.wwm.f32(float) nounwind readnone
+declare float @llvm.amdgcn.wwm.f32(float) nounwind readnone
+declare void @llvm.amdgcn.wqm.demote(i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll
index bbfb88a4b22a36..fb5d801d2d27d2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll
@@ -35,7 +35,5 @@ define half @v_fneg_fabs_sqrt_f16(half %src)  {
   ret half %sqrt
 }
 
-declare half @llvm.amdgcn.sqrt.f16(half) #0
-declare half @llvm.fabs.f16(half) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare half @llvm.amdgcn.sqrt.f16(half) nounwind readnone speculatable willreturn
+declare half @llvm.fabs.f16(half) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll
index 0257a3d111421d..0d74fa1c2fc205 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll
@@ -70,9 +70,7 @@ define double @v_fneg_fabs_sqrt_f64(double %src)  {
   ret double %sqrt
 }
 
-declare float @llvm.amdgcn.sqrt.f32(float) #0
-declare double @llvm.amdgcn.sqrt.f64(double) #0
-declare float @llvm.fabs.f32(float) #0
-declare double @llvm.fabs.f64(double) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare float @llvm.amdgcn.sqrt.f32(float) nounwind readnone speculatable willreturn
+declare double @llvm.amdgcn.sqrt.f64(double) nounwind readnone speculatable willreturn
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable willreturn
+declare double @llvm.fabs.f64(double) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll
index 0bdb21f767191e..69116d2f7bd928 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll
@@ -55,7 +55,5 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
   ret void
 }
 
-declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) nounwind
+declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
index a7a2356eda3b92..52eb0addf96070 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
@@ -147,20 +147,18 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare float @llvm.amdgcn.struct.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare float @llvm.amdgcn.struct.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
index 00be32b06de058..e3f8755dc352e7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
@@ -1042,16 +1042,15 @@ define amdgpu_cs float @buffer_load_f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace
   ret float %fstatus
 }
 
-declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32) #0
-declare { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
index 4c1ae4c228adb3..23150c8418e781 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
@@ -157,8 +157,6 @@ bb:
   unreachable
 }
 ; Function Attrs: nounwind readnone willreturn
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone willreturn
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) #1
-attributes #0 = { nounwind readnone willreturn }
-attributes #1 = { nounwind readonly willreturn }
+declare <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
index 5e03748bee08f5..ba8365deed40a5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
@@ -272,21 +272,19 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #0
+declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind readonly
+declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) nounwind readonly
 
-declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #0
+declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) nounwind readonly
 
-declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x i16> @llvm.amdgcn.struct.buffer.load.v2i16(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x i16> @llvm.amdgcn.struct.buffer.load.v4i16(<4 x i32>, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readonly }
+declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <2 x i16> @llvm.amdgcn.struct.buffer.load.v2i16(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare <4 x i16> @llvm.amdgcn.struct.buffer.load.v4i16(<4 x i32>, i32, i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.ll
index 192b01ac085d7f..828e99574417cc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.ll
@@ -104,11 +104,8 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.format.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.struct.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.format.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
index 94c9f7ab8e756a..b3b15451f431c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
@@ -176,7 +176,7 @@ main_body:
   ret void
 }
 
-define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %index) #0 {
+define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %index) nounwind {
 ; GFX68-LABEL: buffer_store_x2:
 ; GFX68:       ; %bb.0: ; %main_body
 ; GFX68-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 idxen
@@ -385,20 +385,17 @@ define amdgpu_ps void @struct_buffer_store_v4i16(<4 x i32> inreg %rsrc, <4 x i16
   ret void
 }
 
-declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
-declare void @llvm.amdgcn.struct.buffer.store.i8(i8, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.i16(i16, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.f16(half, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.struct.buffer.store.i8(i8, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.i16(i16, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.f16(half, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
index 370859fd07d520..d516f2759d0cf9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
@@ -55,7 +55,5 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind }
+declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind
+declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
index fb44515595a557..3ef72efe175cbf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
@@ -147,20 +147,18 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind }
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare float @llvm.amdgcn.struct.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll
index b0bd4e428ef2dd..a5ccb1632f14f1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll
@@ -835,16 +835,15 @@ define amdgpu_cs float @buffer_load_f32_tfe(ptr addrspace(8) inreg %rsrc, ptr ad
   ret float %fstatus
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-attributes #0 = { nounwind readonly }
+declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
index 46b2516f72f8ea..8030ebaf00f920 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
@@ -129,8 +129,6 @@ bb:
   unreachable
 }
 ; Function Attrs: nounwind readnone willreturn
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone willreturn
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
-attributes #0 = { nounwind readnone willreturn }
-attributes #1 = { nounwind readonly willreturn }
+declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll
index 2f9e6b0a1cf526..387c910dd38754 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll
@@ -272,21 +272,19 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32) #0
+declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <2 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind readonly
+declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
 
-declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #0
+declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
 
-declare i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readonly }
+declare i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <2 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare <4 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.ll
index 13217b24dcd4b4..05ba8b2ca4c149 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.ll
@@ -104,11 +104,8 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.f32(float, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.struct.ptr.buffer.store.format.f32(float, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.format.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.ll
index e52af313607640..946495a2ae2b85 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.ll
@@ -89,7 +89,7 @@ main_body:
 ;CHECK-LABEL: {{^}}buffer_store_x2:
 ;CHECK-NOT: s_waitcnt
 ;CHECK: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 idxen
-define amdgpu_ps void @buffer_store_x2(ptr addrspace(8) inreg %rsrc, <2 x float> %data, i32 %index) #0 {
+define amdgpu_ps void @buffer_store_x2(ptr addrspace(8) inreg %rsrc, <2 x float> %data, i32 %index) nounwind {
 main_body:
   call void @llvm.amdgcn.struct.ptr.buffer.store.v2f32(<2 x float> %data, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0)
   ret void
@@ -177,21 +177,17 @@ define amdgpu_ps void @struct_ptr_buffer_store_v4i16(ptr addrspace(8) inreg %rsr
   ret void
 }
 
-declare void @llvm.amdgcn.struct.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
-declare void @llvm.amdgcn.struct.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2i16(<2 x i16>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4i16(<4 x i16>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32) #0
-
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.struct.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.struct.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v2i16(<2 x i16>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v4i16(<4 x i16>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.ll
index f84422efcf9911..2203ebcf3aaf4e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.ll
@@ -501,13 +501,10 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.struct.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readonly
 
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
index 98ed437581e1af..6328a4c1ca7858 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
@@ -616,13 +616,10 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.struct.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32) nounwind
+declare void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32) nounwind
+declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
 
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll
index b3a135a35a6526..8504ef6e87a254 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll
@@ -99,12 +99,9 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) nounwind
+declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
index 0755dcddd8f46e..c553cb4288418b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
@@ -2,7 +2,7 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
 
-define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
 ; SI-LABEL: bfe_u32_arg_arg_arg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -33,7 +33,7 @@ define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
 ; SI-LABEL: bfe_u32_arg_arg_imm:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -66,7 +66,7 @@ define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) nounwind {
 ; SI-LABEL: bfe_u32_arg_imm_arg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -99,7 +99,7 @@ define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) nounwind {
 ; SI-LABEL: bfe_u32_imm_arg_arg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -134,7 +134,7 @@ define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
 ; SI-LABEL: bfe_u32_arg_0_width_reg_offset:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -159,7 +159,7 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
 ; SI-LABEL: bfe_u32_arg_0_width_imm_offset:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -184,7 +184,7 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_zextload_i8:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -226,7 +226,7 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrsp
 }
 
 ; FIXME: Should be using s_add_i32
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_zext_in_reg_i8:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -272,7 +272,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_zext_in_reg_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -318,7 +318,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -366,7 +366,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -414,7 +414,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -462,7 +462,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -508,7 +508,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -550,7 +550,7 @@ define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_2:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -577,7 +577,7 @@ define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_3:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -604,7 +604,7 @@ define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_4:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -632,7 +632,7 @@ define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_5:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -676,7 +676,7 @@ define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_6:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -721,7 +721,7 @@ define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_7:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -764,7 +764,7 @@ define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_8:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -807,7 +807,7 @@ define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_9:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -849,7 +849,7 @@ define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_10:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -891,7 +891,7 @@ define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_11:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -933,7 +933,7 @@ define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_12:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -976,7 +976,7 @@ define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(
 }
 
 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
-define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_13:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1018,7 +1018,7 @@ define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(
   store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: bfe_u32_test_14:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1044,7 +1044,7 @@ define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(
   store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1069,7 +1069,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1094,7 +1094,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_2:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1119,7 +1119,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_3:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1144,7 +1144,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_4:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1169,7 +1169,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_5:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1194,7 +1194,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_6:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1219,7 +1219,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_7:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1244,7 +1244,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_8:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1269,7 +1269,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_9:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1294,7 +1294,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_10:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1319,7 +1319,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_11:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1344,7 +1344,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_12:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1369,7 +1369,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_13:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1394,7 +1394,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_14:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1419,7 +1419,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_15:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1444,7 +1444,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1469,7 +1469,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_17:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1494,7 +1494,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out)
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: bfe_u32_constant_fold_test_18:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1569,7 +1569,7 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0
 ; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
                                             ptr addrspace(1) %out1,
-                                            ptr addrspace(1) %in) #0 {
+                                            ptr addrspace(1) %in) nounwind {
   %src = load i32, ptr addrspace(1) %in, align 4
   %and = and i32 %src, 63
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
@@ -1578,7 +1578,7 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0
   ret void
 }
 
-define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 {
+define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) nounwind {
 ; SI-LABEL: lshr_and:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -1608,7 +1608,7 @@ define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: v_lshr_and:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1642,7 +1642,7 @@ define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0
   ret void
 }
 
-define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 {
+define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) nounwind {
 ; SI-LABEL: and_lshr:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -1672,7 +1672,7 @@ define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 {
+define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) nounwind {
 ; SI-LABEL: and_lshr2:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -1702,7 +1702,7 @@ define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
+define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) nounwind {
 ; SI-LABEL: shl_lshr:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -1732,7 +1732,4 @@ define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
index e43daf46e1e060..ba5fa1dd142380 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
@@ -11,7 +11,7 @@
 ; GFX8-NOOPT: s_nop 1
 ; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
 define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
-  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false) #0
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false) nounwind readnone convergent
   store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
@@ -24,7 +24,7 @@ define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
 ; GFX8-NOOPT: s_nop 1
 ; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1{{$}}
 define amdgpu_kernel void @dpp_test_bc(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
-  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 true) #0
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 true) nounwind readnone convergent
   store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
@@ -63,7 +63,7 @@ define amdgpu_kernel void @update_dpp64_test(ptr addrspace(1) %arg, i64 %in1, i6
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
   %load = load i64, ptr addrspace(1) %gep
-  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 1, i32 1, i1 false) #0
+  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 1, i32 1, i1 false) nounwind readnone convergent
   store i64 %tmp0, ptr addrspace(1) %gep
   ret void
 }
@@ -83,7 +83,7 @@ define amdgpu_kernel void @update_dpp64_imm_old_test(ptr addrspace(1) %arg, i64
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
   %load = load i64, ptr addrspace(1) %gep
-  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 123451234512345, i64 %load, i32 1, i32 1, i32 1, i1 false) #0
+  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 123451234512345, i64 %load, i32 1, i32 1, i32 1, i1 false) nounwind readnone convergent
   store i64 %tmp0, ptr addrspace(1) %gep
   ret void
 }
@@ -98,7 +98,7 @@ define amdgpu_kernel void @update_dpp64_imm_old_test(ptr addrspace(1) %arg, i64
 ; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
 ; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
 define amdgpu_kernel void @update_dpp64_imm_src_test(ptr addrspace(1) %out, i64 %in1) {
-  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 123451234512345, i32 1, i32 1, i32 1, i1 false) #0
+  %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 123451234512345, i32 1, i32 1, i32 1, i1 false) nounwind readnone convergent
   store i64 %tmp0, ptr addrspace(1) %out
   ret void
 }
@@ -457,10 +457,8 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb8(ptr addrspace(1) %out, <2 x
 
 declare i32 @llvm.amdgcn.workitem.id.x()
 declare void @llvm.amdgcn.s.barrier()
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
-declare <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16>, <2 x i16>, i32, i32, i32, i1) #0
-declare <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half>, <2 x half>, i32, i32, i32, i1) #0
-declare float @llvm.amdgcn.update.dpp.f32(float, float, i32, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) #0
-
-attributes #0 = { nounwind readnone convergent }
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) nounwind readnone convergent
+declare <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16>, <2 x i16>, i32, i32, i32, i1) nounwind readnone convergent
+declare <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half>, <2 x half>, i32, i32, i32, i1) nounwind readnone convergent
+declare float @llvm.amdgcn.update.dpp.f32(float, float, i32, i32, i32, i1) nounwind readnone convergent
+declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) nounwind readnone convergent
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll
index 4b4bdfe118d0d5..deceac9f2755ea 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll
@@ -4,9 +4,9 @@
 ; GCN-DAG: ; wave barrier
 ; GCN-NOT: s_barrier
 
-define amdgpu_kernel void @test_wave_barrier() #0 {
+define amdgpu_kernel void @test_wave_barrier() nounwind {
 entry:
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
@@ -16,13 +16,10 @@ entry:
 ; GCN-LABEL: {{^}}test_wave_barrier_is_not_isBarrier:
 ; GCN-DAG: ; wave barrier
 ; GCN-NOT: s_barrier
-define amdgpu_kernel void @test_wave_barrier_is_not_isBarrier() #0 {
+define amdgpu_kernel void @test_wave_barrier_is_not_isBarrier() nounwind {
 entry:
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   unreachable
 }
 
-declare void @llvm.amdgcn.wave.barrier() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
+declare void @llvm.amdgcn.wave.barrier() convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
index 270ab5fee1125e..8b9d198bdc6933 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
@@ -28,7 +28,7 @@
 
 define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) {
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
+  %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() nounwind readnone speculatable
   store i32 %tmp, ptr addrspace(1) %arg, align 4
   ret void
 }
@@ -49,7 +49,7 @@ bb:
 
 define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) {
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
+  %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() nounwind readnone speculatable
   %tmp1 = icmp ugt i32 %tmp, 32
   %tmp2 = select i1 %tmp1, i32 2, i32 1
   store i32 %tmp2, ptr addrspace(1) %arg
@@ -67,7 +67,7 @@ bb:
 
 define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) {
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
+  %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() nounwind readnone speculatable
   %tmp1 = icmp ugt i32 %tmp, 32
   br i1 %tmp1, label %bb2, label %bb3
 
@@ -79,6 +79,4 @@ bb3:                                              ; preds = %bb2, %bb
   ret void
 }
 
-declare i32 @llvm.amdgcn.wavefrontsize() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.wavefrontsize() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll
index ab29ca4a997348..b84906c91adecf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll
@@ -3,9 +3,9 @@
 ; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
 ; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone
 
 ; ALL-LABEL: {{^}}test_workgroup_id_x:
 
@@ -32,7 +32,7 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workgroup.id.x()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -59,7 +59,7 @@ define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workgroup.id.y()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -94,14 +94,11 @@ define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workgroup.id.z()
   store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
index a1835ea176d5b6..7ac7ab427a4f7e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
@@ -5,9 +5,9 @@
 ; RUN: llc -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
 ; RUN: llc -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone
 
 ; MESA: .section .AMDGPU.config
 ; MESA: .long 47180
@@ -20,7 +20,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
 ; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0
 
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -38,7 +38,7 @@ define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 {
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1
-define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.y()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -56,7 +56,7 @@ define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 {
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.z()
   store i32 %id, ptr addrspace(1) %out
   ret void
@@ -127,9 +127,6 @@ define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_
   ret void
 }
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-
 !llvm.module.flags = !{!3}
 
 !0 = !{i32 64, i32 1, i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
index 2e47cc505ee692..3889833baa6b87 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
@@ -69,7 +69,7 @@ define amdgpu_ps void @static_exact(float %arg0, float %arg1) {
   %c1 = fcmp oge float %arg1, 0.0
   call void @llvm.amdgcn.wqm.demote(i1 false)
   %tmp1 = select i1 %c0, float 1.000000e+00, float 0.000000e+00
-  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -150,7 +150,7 @@ define amdgpu_ps void @dynamic_exact(float %arg0, float %arg1) {
   %c1 = fcmp oge float %arg1, 0.0
   call void @llvm.amdgcn.wqm.demote(i1 %c1)
   %tmp1 = select i1 %c0, float 1.000000e+00, float 0.000000e+00
-  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -276,7 +276,7 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
 
 .continue:
   %tmp1 = select i1 %c2, float 1.000000e+00, float 0.000000e+00
-  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -406,11 +406,11 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
   br label %.continue
 
 .continue:
-  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
   %tex0 = extractelement <4 x float> %tex, i32 0
   %tex1 = extractelement <4 x float> %tex, i32 0
   %coord1 = fadd float %tex0, %tex1
-  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
 
   ret <4 x float> %rtex
 }
@@ -532,7 +532,7 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
 ; GFX10-64-NEXT:    s_endpgm
 ; GFX10-64-NEXT:  .LBB4_5:
 .entry:
-  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
   %tex0 = extractelement <4 x float> %tex, i32 0
   %tex1 = extractelement <4 x float> %tex, i32 0
   %z.cmp = fcmp olt float %tex0, 0.0
@@ -544,7 +544,7 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
 
 .continue:
   %coord1 = fadd float %tex0, %tex1
-  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
 
   ret <4 x float> %rtex
 }
@@ -646,13 +646,13 @@ define amdgpu_ps <4 x float> @wqm_demote_dynamic(<8 x i32> inreg %rsrc, <4 x i32
 ; GFX10-64-NEXT:    s_endpgm
 ; GFX10-64-NEXT:  .LBB5_3:
 .entry:
-  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
   %tex0 = extractelement <4 x float> %tex, i32 0
   %tex1 = extractelement <4 x float> %tex, i32 0
   %z.cmp = fcmp olt float %tex0, 0.0
   call void @llvm.amdgcn.wqm.demote(i1 %z.cmp)
   %coord1 = fadd float %tex0, %tex1
-  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
+  %rtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %coord1, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) nounwind
 
   ret <4 x float> %rtex
 }
@@ -849,8 +849,8 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
 .entry:
   %p0 = extractelement <2 x float> %input, i32 0
   %p1 = extractelement <2 x float> %input, i32 1
-  %x0 = call float @llvm.amdgcn.interp.p1(float %p0, i32 immarg 0, i32 immarg 0, i32 %index) #2
-  %x1 = call float @llvm.amdgcn.interp.p2(float %x0, float %p1, i32 immarg 0, i32 immarg 0, i32 %index) #2
+  %x0 = call float @llvm.amdgcn.interp.p1(float %p0, i32 immarg 0, i32 immarg 0, i32 %index) nounwind readnone speculatable
+  %x1 = call float @llvm.amdgcn.interp.p2(float %x0, float %p1, i32 immarg 0, i32 immarg 0, i32 %index) nounwind readnone speculatable
   %argi = fptosi float %arg to i32
   %cond0 = icmp eq i32 %argi, 0
   br i1 %cond0, label %.continue0, label %.demote0
@@ -877,7 +877,7 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
   br label %.continue1
 
 .continue1:
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> <half 0xH3C00, half 0xH0000>, <2 x half> <half 0xH0000, half 0xH3C00>, i1 immarg true, i1 immarg true) #3
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> <half 0xH3C00, half 0xH0000>, <2 x half> <half 0xH0000, half 0xH3C00>, i1 immarg true, i1 immarg true) inaccessiblememonly nounwind
   ret void
 }
 
@@ -1142,8 +1142,8 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
 .entry:
   %p0 = extractelement <2 x float> %input, i32 0
   %p1 = extractelement <2 x float> %input, i32 1
-  %x0 = call float @llvm.amdgcn.interp.p1(float %p0, i32 immarg 0, i32 immarg 0, i32 %index) #2
-  %x1 = call float @llvm.amdgcn.interp.p2(float %x0, float %p1, i32 immarg 0, i32 immarg 0, i32 %index) #2
+  %x0 = call float @llvm.amdgcn.interp.p1(float %p0, i32 immarg 0, i32 immarg 0, i32 %index) nounwind readnone speculatable
+  %x1 = call float @llvm.amdgcn.interp.p2(float %x0, float %p1, i32 immarg 0, i32 immarg 0, i32 %index) nounwind readnone speculatable
   %argi = fptosi float %arg to i32
   %cond0 = icmp eq i32 %argi, 0
   br i1 %cond0, label %.continue0, label %.demote0
@@ -1176,7 +1176,7 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
   br i1 %loop.cond, label %.continue0, label %.return
 
 .return:
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> <half 0xH3C00, half 0xH0000>, <2 x half> <half 0xH0000, half 0xH3C00>, i1 immarg true, i1 immarg true) #3
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> <half 0xH3C00, half 0xH0000>, <2 x half> <half 0xH0000, half 0xH3C00>, i1 immarg true, i1 immarg true) inaccessiblememonly nounwind
   ret void
 }
 
@@ -1213,23 +1213,17 @@ define amdgpu_ps void @static_exact_nop(float %arg0, float %arg1) {
   %c1 = fcmp oge float %arg1, 0.0
   call void @llvm.amdgcn.wqm.demote(i1 true)
   %tmp1 = select i1 %c0, float 1.000000e+00, float 0.000000e+00
-  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) nounwind
   ret void
 }
 
 
-declare void @llvm.amdgcn.wqm.demote(i1) #0
-declare i1 @llvm.amdgcn.live.mask() #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare float @llvm.amdgcn.wqm.f32(float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #2
-declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #3
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #4
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readnone speculatable }
-attributes #3 = { inaccessiblememonly nounwind }
-attributes #4 = { convergent nounwind readnone }
+declare void @llvm.amdgcn.wqm.demote(i1) nounwind
+declare i1 @llvm.amdgcn.live.mask() nounwind
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.wqm.f32(float) nounwind readnone
+declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) nounwind readnone speculatable
+declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) nounwind readnone speculatable
+declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) inaccessiblememonly nounwind
+declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) convergent nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
index a4da690b2af1f2..f82a0dad5dcc0f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
@@ -10,7 +10,7 @@
 ;WAVE64: s_wqm_b64 [[WQM:[^,]+]], [[CMP]]
 ;WAVE32: s_wqm_b32 [[WQM:[^,]+]], [[CMP]]
 ;CHECK: v_cndmask_b32_e64 v0, 0, 1.0, [[WQM]]
-define amdgpu_ps float @ret(i32 %v0, i32 %v1) #1 {
+define amdgpu_ps float @ret(i32 %v0, i32 %v1) nounwind {
 main_body:
   %c = icmp eq i32 %v0, %v1
   %w = call i1 @llvm.amdgcn.wqm.vote(i1 %c)
@@ -21,7 +21,7 @@ main_body:
 ;CHECK-LABEL: {{^}}true:
 ;WAVE64: s_wqm_b64
 ;WAVE32: s_wqm_b32
-define amdgpu_ps float @true() #1 {
+define amdgpu_ps float @true() nounwind {
 main_body:
   %w = call i1 @llvm.amdgcn.wqm.vote(i1 true)
   %r = select i1 %w, float 1.0, float 0.0
@@ -31,7 +31,7 @@ main_body:
 ;CHECK-LABEL: {{^}}false:
 ;WAVE64: s_wqm_b64
 ;WAVE32: s_wqm_b32
-define amdgpu_ps float @false() #1 {
+define amdgpu_ps float @false() nounwind {
 main_body:
   %w = call i1 @llvm.amdgcn.wqm.vote(i1 false)
   %r = select i1 %w, float 1.0, float 0.0
@@ -53,7 +53,7 @@ main_body:
 ;WAVE32: s_and_b32 exec_lo, exec_lo, [[MASK]]
 
 ;CHECK: s_endpgm
-define amdgpu_ps float @kill(i32 %v0, i32 %v1) #1 {
+define amdgpu_ps float @kill(i32 %v0, i32 %v1) nounwind {
 main_body:
   %c = icmp eq i32 %v0, %v1
   %w = call i1 @llvm.amdgcn.wqm.vote(i1 %c)
@@ -61,7 +61,5 @@ main_body:
   ret float 0.0
 }
 
-declare void @llvm.amdgcn.kill(i1) #1
+declare void @llvm.amdgcn.kill(i1) nounwind
 declare i1 @llvm.amdgcn.wqm.vote(i1)
-
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
index 37951669dbe755..bb57ef2a62b91a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
@@ -3,12 +3,12 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
 
-declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.writelane(i32, i32, i32) nounwind readnone convergent
 
 ; CHECK-LABEL: {{^}}test_writelane_sreg:
 ; CIGFX9: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, m0
 ; GFX10: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @test_writelane_sreg(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
+define amdgpu_kernel void @test_writelane_sreg(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
   %oldval = load i32, ptr addrspace(1) %out
   %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval)
   store i32 %writelane, ptr addrspace(1) %out, align 4
@@ -17,7 +17,7 @@ define amdgpu_kernel void @test_writelane_sreg(ptr addrspace(1) %out, i32 %src0,
 
 ; CHECK-LABEL: {{^}}test_writelane_imm_sreg:
 ; CHECK: v_writelane_b32 v{{[0-9]+}}, 32, s{{[0-9]+}}
-define amdgpu_kernel void @test_writelane_imm_sreg(ptr addrspace(1) %out, i32 %src1) #1 {
+define amdgpu_kernel void @test_writelane_imm_sreg(ptr addrspace(1) %out, i32 %src1) nounwind {
   %oldval = load i32, ptr addrspace(1) %out
   %writelane = call i32 @llvm.amdgcn.writelane(i32 32, i32 %src1, i32 %oldval)
   store i32 %writelane, ptr addrspace(1) %out, align 4
@@ -27,7 +27,7 @@ define amdgpu_kernel void @test_writelane_imm_sreg(ptr addrspace(1) %out, i32 %s
 ; CHECK-LABEL: {{^}}test_writelane_vreg_lane:
 ; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}}
 ; CHECK: v_writelane_b32 v{{[0-9]+}}, 12, [[LANE]]
-define amdgpu_kernel void @test_writelane_vreg_lane(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @test_writelane_vreg_lane(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.in = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 %tid
   %args = load <2 x i32>, ptr addrspace(1) %gep.in
@@ -43,7 +43,7 @@ define amdgpu_kernel void @test_writelane_vreg_lane(ptr addrspace(1) %out, ptr a
 ; CIGFX9: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
 ; CIGFX9: v_writelane_b32 v{{[0-9]+}}, [[COPY_M0]], m0
 ; GFX10: v_writelane_b32 v{{[0-9]+}}, m0, s{{[0-9]+}}
-define amdgpu_kernel void @test_writelane_m0_sreg(ptr addrspace(1) %out, i32 %src1) #1 {
+define amdgpu_kernel void @test_writelane_m0_sreg(ptr addrspace(1) %out, i32 %src1) nounwind {
   %oldval = load i32, ptr addrspace(1) %out
   %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
   %writelane = call i32 @llvm.amdgcn.writelane(i32 %m0, i32 %src1, i32 %oldval)
@@ -53,9 +53,9 @@ define amdgpu_kernel void @test_writelane_m0_sreg(ptr addrspace(1) %out, i32 %sr
 
 ; CHECK-LABEL: {{^}}test_writelane_imm:
 ; CHECK: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 32
-define amdgpu_kernel void @test_writelane_imm(ptr addrspace(1) %out, i32 %src0) #1 {
+define amdgpu_kernel void @test_writelane_imm(ptr addrspace(1) %out, i32 %src0) nounwind {
   %oldval = load i32, ptr addrspace(1) %out
-  %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 32, i32 %oldval) #0
+  %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 32, i32 %oldval) nounwind readnone convergent
   store i32 %writelane, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -64,7 +64,7 @@ define amdgpu_kernel void @test_writelane_imm(ptr addrspace(1) %out, i32 %src0)
 ; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], s{{[0-9]+}}
 ; CIGFX9: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, m0
 ; GFX10: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
+define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
   %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval)
   store i32 %writelane, ptr addrspace(1) %out, align 4
   ret void
@@ -74,14 +74,10 @@ define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, ptr add
 ; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], 42
 ; CIGFX9: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, m0
 ; GFX10: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @test_writelane_imm_oldval(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 {
+define amdgpu_kernel void @test_writelane_imm_oldval(ptr addrspace(1) %out, i32 %src0, i32 %src1) nounwind {
   %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 42)
   store i32 %writelane, ptr addrspace(1) %out, align 4
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind readnone convergent }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
index 6c8fccd54b81bd..061986e86d335d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@@ -12,7 +12,7 @@
 
 ; GCN: flat_store_dword
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_debug_value(ptr addrspace(1) nocapture %globalptr_arg) #0 !dbg !4 {
+define amdgpu_kernel void @test_debug_value(ptr addrspace(1) nocapture %globalptr_arg) nounwind !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata ptr addrspace(1) %globalptr_arg, metadata !10, metadata !13), !dbg !14
   store i32 123, ptr addrspace(1) %globalptr_arg, align 4, !dbg !14
@@ -27,16 +27,13 @@ entry:
 ; NOOPT-NEXT: s_endpgm
 
 ; OPT: s_endpgm
-define amdgpu_kernel void @only_undef_dbg_value() #1 {
+define amdgpu_kernel void @only_undef_dbg_value() nounwind readnone {
 bb:
   call void @llvm.dbg.value(metadata <4 x float> undef, metadata !10, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !14
   ret void, !dbg !14
 }
 
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind  }
-attributes #1 = { nounwind readnone }
+declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !12, !15}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index 7a0450761e1f11..63cc69673db04f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -3574,7 +3574,7 @@ define float @v_exp_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_exp_f32_afn_daz(float %in) #0 {
+define float @v_exp_f32_afn_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-LABEL: v_exp_f32_afn_daz:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3602,7 +3602,7 @@ define float @v_exp_f32_afn_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp_f32_afn_dynamic(float %in) #1 {
+define float @v_exp_f32_afn_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GCN-SDAG-LABEL: v_exp_f32_afn_dynamic:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3739,7 +3739,7 @@ define float @v_fabs_exp_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_exp_f32_daz(float %in) #0 {
+define float @v_exp_f32_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp_f32_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4055,7 +4055,7 @@ define float @v_exp_f32_nnan(float %in) {
   ret float %result
 }
 
-define float @v_exp_f32_nnan_daz(float %in) #0 {
+define float @v_exp_f32_nnan_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp_f32_nnan_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4213,7 +4213,7 @@ define float @v_exp_f32_nnan_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp_f32_nnan_dynamic(float %in) #1 {
+define float @v_exp_f32_nnan_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; VI-SDAG-LABEL: v_exp_f32_nnan_dynamic:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4371,7 +4371,7 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_exp_f32_ninf_daz(float %in) #0 {
+define float @v_exp_f32_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp_f32_ninf_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4505,7 +4505,7 @@ define float @v_exp_f32_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp_f32_ninf_dynamic(float %in) #1 {
+define float @v_exp_f32_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; VI-SDAG-LABEL: v_exp_f32_ninf_dynamic:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4773,7 +4773,7 @@ define float @v_exp_f32_nnan_ninf(float %in) {
   ret float %result
 }
 
-define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
+define float @v_exp_f32_nnan_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4907,7 +4907,7 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
+define float @v_exp_f32_nnan_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5041,7 +5041,7 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_exp_f32_fast_daz(float %in) #0 {
+define float @v_exp_f32_fast_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-LABEL: v_exp_f32_fast_daz:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5069,7 +5069,7 @@ define float @v_exp_f32_fast_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp_f32_dynamic_mode(float %in) #1 {
+define float @v_exp_f32_dynamic_mode(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; VI-SDAG-LABEL: v_exp_f32_dynamic_mode:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5870,7 +5870,7 @@ define float @v_exp_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) {
   ret float %result
 }
 
-define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
+define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7159,7 +7159,7 @@ define float @v_exp_f32_contract(float %in) {
   ret float %result
 }
 
-define float @v_exp_f32_contract_daz(float %in) #0 {
+define float @v_exp_f32_contract_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp_f32_contract_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7451,17 +7451,13 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
   ret float %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.exp.f32(float) #2
-declare <2 x float> @llvm.exp.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.exp.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.exp.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.exp.f16(half) #2
-declare <2 x half> @llvm.exp.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.exp.v3f16(<3 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
-attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
-attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
-attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare float @llvm.fabs.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.exp.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.exp.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @llvm.exp.v3f32(<3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fabs.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.exp.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.exp.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x half> @llvm.exp.v3f16(<3 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 544c1de6c7bb77..cf10322029aa39 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -3600,7 +3600,7 @@ define float @v_exp10_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_exp10_f32_afn_daz(float %in) #0 {
+define float @v_exp10_f32_afn_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-SDAG-LABEL: v_exp10_f32_afn_daz:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3648,7 +3648,7 @@ define float @v_exp10_f32_afn_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp10_f32_afn_dynamic(float %in) #1 {
+define float @v_exp10_f32_afn_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GCN-SDAG-LABEL: v_exp10_f32_afn_dynamic:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3797,7 +3797,7 @@ define float @v_fabs_exp10_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_exp10_f32_daz(float %in) #0 {
+define float @v_exp10_f32_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp10_f32_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4113,7 +4113,7 @@ define float @v_exp10_f32_nnan(float %in) {
   ret float %result
 }
 
-define float @v_exp10_f32_nnan_daz(float %in) #0 {
+define float @v_exp10_f32_nnan_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp10_f32_nnan_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4271,7 +4271,7 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
+define float @v_exp10_f32_nnan_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; VI-SDAG-LABEL: v_exp10_f32_nnan_dynamic:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4429,7 +4429,7 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_exp10_f32_ninf_daz(float %in) #0 {
+define float @v_exp10_f32_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp10_f32_ninf_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4563,7 +4563,7 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp10_f32_ninf_dynamic(float %in) #1 {
+define float @v_exp10_f32_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; VI-SDAG-LABEL: v_exp10_f32_ninf_dynamic:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4831,7 +4831,7 @@ define float @v_exp10_f32_nnan_ninf(float %in) {
   ret float %result
 }
 
-define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
+define float @v_exp10_f32_nnan_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4965,7 +4965,7 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
+define float @v_exp10_f32_nnan_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; VI-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5099,7 +5099,7 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_exp10_f32_fast_daz(float %in) #0 {
+define float @v_exp10_f32_fast_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-SDAG-LABEL: v_exp10_f32_fast_daz:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5147,7 +5147,7 @@ define float @v_exp10_f32_fast_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp10_f32_dynamic_mode(float %in) #1 {
+define float @v_exp10_f32_dynamic_mode(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; VI-SDAG-LABEL: v_exp10_f32_dynamic_mode:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5963,7 +5963,7 @@ define float @v_exp10_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) {
   ret float %result
 }
 
-define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 {
+define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7253,7 +7253,7 @@ define float @v_exp10_f32_contract(float %in) {
   ret float %result
 }
 
-define float @v_exp10_f32_contract_daz(float %in) #0 {
+define float @v_exp10_f32_contract_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; VI-SDAG-LABEL: v_exp10_f32_contract_daz:
 ; VI-SDAG:       ; %bb.0:
 ; VI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7545,17 +7545,13 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
   ret float %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.exp10.f32(float) #2
-declare <2 x float> @llvm.exp10.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.exp10.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.exp10.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.exp10.f16(half) #2
-declare <2 x half> @llvm.exp10.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.exp10.v3f16(<3 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
-attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
-attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
-attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare float @llvm.fabs.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.exp10.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.exp10.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @llvm.exp10.v3f32(<3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.exp10.v4f32(<4 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fabs.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.exp10.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.exp10.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x half> @llvm.exp10.v3f16(<3 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
index 36e78975cdb015..0fb915b11609c1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
@@ -1353,7 +1353,7 @@ define float @v_exp2_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_exp2_f32_afn_daz(float %in) #0 {
+define float @v_exp2_f32_afn_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-LABEL: v_exp2_f32_afn_daz:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1373,7 +1373,7 @@ define float @v_exp2_f32_afn_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp2_f32_afn_dynamic(float %in) #1 {
+define float @v_exp2_f32_afn_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GCN-SDAG-LABEL: v_exp2_f32_afn_dynamic:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1458,7 +1458,7 @@ define float @v_fabs_exp2_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_exp2_f32_daz(float %in) #0 {
+define float @v_exp2_f32_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-LABEL: v_exp2_f32_daz:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1520,7 +1520,7 @@ define float @v_exp2_f32_nnan(float %in) {
   ret float %result
 }
 
-define float @v_exp2_f32_nnan_daz(float %in) #0 {
+define float @v_exp2_f32_nnan_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-LABEL: v_exp2_f32_nnan_daz:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1540,7 +1540,7 @@ define float @v_exp2_f32_nnan_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp2_f32_nnan_dynamic(float %in) #1 {
+define float @v_exp2_f32_nnan_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GCN-SDAG-LABEL: v_exp2_f32_nnan_dynamic:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1582,7 +1582,7 @@ define float @v_exp2_f32_nnan_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_exp2_f32_ninf_daz(float %in) #0 {
+define float @v_exp2_f32_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-LABEL: v_exp2_f32_ninf_daz:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1602,7 +1602,7 @@ define float @v_exp2_f32_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp2_f32_ninf_dynamic(float %in) #1 {
+define float @v_exp2_f32_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GCN-SDAG-LABEL: v_exp2_f32_ninf_dynamic:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1686,7 +1686,7 @@ define float @v_exp2_f32_nnan_ninf(float %in) {
   ret float %result
 }
 
-define float @v_exp2_f32_nnan_ninf_daz(float %in) #0 {
+define float @v_exp2_f32_nnan_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-LABEL: v_exp2_f32_nnan_ninf_daz:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1706,7 +1706,7 @@ define float @v_exp2_f32_nnan_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 {
+define float @v_exp2_f32_nnan_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GCN-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1748,7 +1748,7 @@ define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_exp2_f32_fast_daz(float %in) #0 {
+define float @v_exp2_f32_fast_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-LABEL: v_exp2_f32_fast_daz:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1768,7 +1768,7 @@ define float @v_exp2_f32_fast_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_exp2_f32_dynamic_mode(float %in) #1 {
+define float @v_exp2_f32_dynamic_mode(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GCN-SDAG-LABEL: v_exp2_f32_dynamic_mode:
 ; GCN-SDAG:       ; %bb.0:
 ; GCN-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2890,7 +2890,7 @@ define float @v_exp2_f32_contract(float %in) {
   ret float %result
 }
 
-define float @v_exp2_f32_contract_daz(float %in) #0 {
+define float @v_exp2_f32_contract_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GCN-LABEL: v_exp2_f32_contract_daz:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2952,17 +2952,13 @@ define float @v_exp2_f32_contract_nnan_ninf(float %in) {
   ret float %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.exp2.f32(float) #2
-declare <2 x float> @llvm.exp2.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.exp2.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.exp2.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.exp2.f16(half) #2
-declare <2 x half> @llvm.exp2.v2f16(<2 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.exp2.v3f16(<3 x half>) #2
-
-attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
-attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
-attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare float @llvm.fabs.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.exp2.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.exp2.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @llvm.exp2.v3f32(<3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fabs.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.exp2.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.exp2.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x half> @llvm.exp2.v3f16(<3 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
index 14742c5827c1e4..33e8d636bbd59f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
@@ -1222,19 +1222,17 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) {
   ret <2 x i32> %result.1
 }
 
-declare { float, i32 } @llvm.frexp.f32.i32(float) #0
-declare { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float>) #0
+declare { float, i32 } @llvm.frexp.f32.i32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
-declare { half, i32 } @llvm.frexp.f16.i32(half) #0
-declare { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half>) #0
+declare { half, i32 } @llvm.frexp.f16.i32(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
-declare { double, i32 } @llvm.frexp.f64.i32(double) #0
-declare { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double>) #0
+declare { double, i32 } @llvm.frexp.f64.i32(double) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
-declare { half, i16 } @llvm.frexp.f16.i16(half) #0
-declare { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half>) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare { half, i16 } @llvm.frexp.f16.i16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
 ; GFX11-GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
index ea823f30f26c22..614406db3a2fe0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
@@ -2068,7 +2068,7 @@ entry:
   ret i1 %0
 }
 
-define i1 @iszero_or_nan_f_daz(bfloat %x) #0 {
+define i1 @iszero_or_nan_f_daz(bfloat %x) "denormal-fp-math"="ieee,preserve-sign" {
 ; GFX7CHECK-LABEL: iszero_or_nan_f_daz:
 ; GFX7CHECK:       ; %bb.0: ; %entry
 ; GFX7CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2127,7 +2127,7 @@ entry:
   ret i1 %0
 }
 
-define i1 @iszero_or_nan_f_maybe_daz(bfloat %x) #1 {
+define i1 @iszero_or_nan_f_maybe_daz(bfloat %x) "denormal-fp-math"="ieee,dynamic" {
 ; GFX7CHECK-LABEL: iszero_or_nan_f_maybe_daz:
 ; GFX7CHECK:       ; %bb.0: ; %entry
 ; GFX7CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2245,7 +2245,7 @@ entry:
   ret i1 %0
 }
 
-define i1 @not_iszero_or_nan_f_daz(bfloat %x) #0 {
+define i1 @not_iszero_or_nan_f_daz(bfloat %x) "denormal-fp-math"="ieee,preserve-sign" {
 ; GFX7CHECK-LABEL: not_iszero_or_nan_f_daz:
 ; GFX7CHECK:       ; %bb.0: ; %entry
 ; GFX7CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2304,7 +2304,7 @@ entry:
   ret i1 %0
 }
 
-define i1 @not_iszero_or_nan_f_maybe_daz(bfloat %x) #1 {
+define i1 @not_iszero_or_nan_f_maybe_daz(bfloat %x) "denormal-fp-math"="ieee,dynamic" {
 ; GFX7CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
 ; GFX7CHECK:       ; %bb.0: ; %entry
 ; GFX7CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2904,10 +2904,8 @@ declare <3 x i1> @llvm.is.fpclass.v3bf16(<3 x bfloat>, i32)
 declare <4 x i1> @llvm.is.fpclass.v4bf16(<4 x bfloat>, i32)
 
 ; Assume DAZ
-attributes #0 = { "denormal-fp-math"="ieee,preserve-sign" }
 
 ; Maybe daz
-attributes #1 = { "denormal-fp-math"="ieee,dynamic" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX10SELDAG: {{.*}}
 ; GFX11SELDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index ec3c08ec795235..c76888d6160f53 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -2386,7 +2386,7 @@ entry:
   ret i1 %0
 }
 
-define i1 @iszero_or_nan_f_daz(half %x) #0 {
+define i1 @iszero_or_nan_f_daz(half %x) "denormal-fp-math"="ieee,preserve-sign" {
 ; GFX7SELDAG-LABEL: iszero_or_nan_f_daz:
 ; GFX7SELDAG:       ; %bb.0: ; %entry
 ; GFX7SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2444,7 +2444,7 @@ entry:
   ret i1 %0
 }
 
-define i1 @iszero_or_nan_f_maybe_daz(half %x) #1 {
+define i1 @iszero_or_nan_f_maybe_daz(half %x) "denormal-fp-math"="ieee,dynamic" {
 ; GFX7SELDAG-LABEL: iszero_or_nan_f_maybe_daz:
 ; GFX7SELDAG:       ; %bb.0: ; %entry
 ; GFX7SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2569,7 +2569,7 @@ entry:
   ret i1 %0
 }
 
-define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
+define i1 @not_iszero_or_nan_f_daz(half %x) "denormal-fp-math"="ieee,preserve-sign" {
 ; GFX7SELDAG-LABEL: not_iszero_or_nan_f_daz:
 ; GFX7SELDAG:       ; %bb.0: ; %entry
 ; GFX7SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2636,7 +2636,7 @@ entry:
   ret i1 %0
 }
 
-define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
+define i1 @not_iszero_or_nan_f_maybe_daz(half %x) "denormal-fp-math"="ieee,dynamic" {
 ; GFX7SELDAG-LABEL: not_iszero_or_nan_f_maybe_daz:
 ; GFX7SELDAG:       ; %bb.0: ; %entry
 ; GFX7SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3213,7 +3213,5 @@ declare <3 x i1> @llvm.is.fpclass.v3f16(<3 x half>, i32)
 declare <4 x i1> @llvm.is.fpclass.v4f16(<4 x half>, i32)
 
 ; Assume DAZ
-attributes #0 = { "denormal-fp-math"="ieee,preserve-sign" }
 
 ; Maybe daz
-attributes #1 = { "denormal-fp-math"="ieee,dynamic" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
index dc7fe840f51180..39fa2dabe9e3a3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
@@ -565,20 +565,18 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
   ret <2 x half> %result
 }
 
-declare float @llvm.ldexp.f32.i32(float, i32) #0
-declare float @llvm.ldexp.f32.i16(float, i16) #0
-declare float @llvm.ldexp.f32.i64(float, i64) #0
-declare half @llvm.ldexp.f16.i8(half, i8) #0
-declare half @llvm.ldexp.f16.i16(half, i16) #0
-declare half @llvm.ldexp.f16.i32(half, i32) #0
-declare <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>) #0
-declare <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>) #0
-declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0
-declare <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float>, <2 x i16>) #0
-declare <2 x float> @llvm.ldexp.v2f32.v2i64(<2 x float>, <2 x i64>) #0
-declare <3 x float> @llvm.ldexp.v3f32.v3i32(<3 x float>, <3 x i32>) #0
-declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0
-declare double @llvm.ldexp.f64.i32(double, i32) #0
-declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) #0
-
-attributes #0 = { nounwind readnone }
+declare float @llvm.ldexp.f32.i32(float, i32) nounwind readnone
+declare float @llvm.ldexp.f32.i16(float, i16) nounwind readnone
+declare float @llvm.ldexp.f32.i64(float, i64) nounwind readnone
+declare half @llvm.ldexp.f16.i8(half, i8) nounwind readnone
+declare half @llvm.ldexp.f16.i16(half, i16) nounwind readnone
+declare half @llvm.ldexp.f16.i32(half, i32) nounwind readnone
+declare <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>) nounwind readnone
+declare <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>) nounwind readnone
+declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) nounwind readnone
+declare <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float>, <2 x i16>) nounwind readnone
+declare <2 x float> @llvm.ldexp.v2f32.v2i64(<2 x float>, <2 x i64>) nounwind readnone
+declare <3 x float> @llvm.ldexp.v3f32.v3i32(<3 x float>, <3 x i32>) nounwind readnone
+declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) nounwind readnone
+declare double @llvm.ldexp.f64.i32(double, i32) nounwind readnone
+declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index d847af780acab3..fbc81531b16039 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -3562,7 +3562,7 @@ define float @v_log_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_log_f32_afn_daz(float %in) #0 {
+define float @v_log_f32_afn_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log_f32_afn_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3591,7 +3591,7 @@ define float @v_log_f32_afn_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log_f32_afn_dynamic(float %in) #1 {
+define float @v_log_f32_afn_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log_f32_afn_dynamic:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3830,7 +3830,7 @@ define float @v_fabs_log_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_log_f32_daz(float %in) #0 {
+define float @v_log_f32_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; SI-SDAG-LABEL: v_log_f32_daz:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4172,7 +4172,7 @@ define float @v_log_f32_nnan(float %in) {
   ret float %result
 }
 
-define float @v_log_f32_nnan_daz(float %in) #0 {
+define float @v_log_f32_nnan_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; SI-SDAG-LABEL: v_log_f32_nnan_daz:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4312,7 +4312,7 @@ define float @v_log_f32_nnan_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log_f32_nnan_dynamic(float %in) #1 {
+define float @v_log_f32_nnan_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log_f32_nnan_dynamic:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4514,7 +4514,7 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_log_f32_ninf_daz(float %in) #0 {
+define float @v_log_f32_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; SI-SDAG-LABEL: v_log_f32_ninf_daz:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4654,7 +4654,7 @@ define float @v_log_f32_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log_f32_ninf_dynamic(float %in) #1 {
+define float @v_log_f32_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log_f32_ninf_dynamic:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5034,7 +5034,7 @@ define float @v_log_f32_nnan_ninf(float %in) {
   ret float %result
 }
 
-define float @v_log_f32_nnan_ninf_daz(float %in) #0 {
+define float @v_log_f32_nnan_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; SI-SDAG-LABEL: v_log_f32_nnan_ninf_daz:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5137,7 +5137,7 @@ define float @v_log_f32_nnan_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
+define float @v_log_f32_nnan_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log_f32_nnan_ninf_dynamic:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5315,7 +5315,7 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_log_f32_fast_daz(float %in) #0 {
+define float @v_log_f32_fast_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log_f32_fast_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5344,7 +5344,7 @@ define float @v_log_f32_fast_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log_f32_dynamic_mode(float %in) #1 {
+define float @v_log_f32_dynamic_mode(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log_f32_dynamic_mode:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7682,21 +7682,17 @@ define <4 x half> @v_log_v4f16_fast(<4 x half> %in) {
   ret <4 x half> %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.log.f32(float) #2
-declare <2 x float> @llvm.log.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.log.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.log.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.log.f16(half) #2
-declare <2 x half> @llvm.log.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.log.v3f16(<3 x half>) #2
-declare <4 x half> @llvm.log.v4f16(<4 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
-attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
-attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
-attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare float @llvm.fabs.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.log.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.log.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @llvm.log.v3f32(<3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.log.v4f32(<4 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fabs.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.log.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.log.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x half> @llvm.log.v3f16(<3 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x half> @llvm.log.v4f16(<4 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX689-GISEL: {{.*}}
 ; GFX689-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index 3f060de9f6596d..e2e01c4e1b68a7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -3562,7 +3562,7 @@ define float @v_log10_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_log10_f32_afn_daz(float %in) #0 {
+define float @v_log10_f32_afn_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log10_f32_afn_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3591,7 +3591,7 @@ define float @v_log10_f32_afn_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log10_f32_afn_dynamic(float %in) #1 {
+define float @v_log10_f32_afn_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log10_f32_afn_dynamic:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3830,7 +3830,7 @@ define float @v_fabs_log10_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_log10_f32_daz(float %in) #0 {
+define float @v_log10_f32_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; SI-SDAG-LABEL: v_log10_f32_daz:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4172,7 +4172,7 @@ define float @v_log10_f32_nnan(float %in) {
   ret float %result
 }
 
-define float @v_log10_f32_nnan_daz(float %in) #0 {
+define float @v_log10_f32_nnan_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; SI-SDAG-LABEL: v_log10_f32_nnan_daz:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4312,7 +4312,7 @@ define float @v_log10_f32_nnan_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log10_f32_nnan_dynamic(float %in) #1 {
+define float @v_log10_f32_nnan_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log10_f32_nnan_dynamic:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4514,7 +4514,7 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_log10_f32_ninf_daz(float %in) #0 {
+define float @v_log10_f32_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; SI-SDAG-LABEL: v_log10_f32_ninf_daz:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4654,7 +4654,7 @@ define float @v_log10_f32_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log10_f32_ninf_dynamic(float %in) #1 {
+define float @v_log10_f32_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log10_f32_ninf_dynamic:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5034,7 +5034,7 @@ define float @v_log10_f32_nnan_ninf(float %in) {
   ret float %result
 }
 
-define float @v_log10_f32_nnan_ninf_daz(float %in) #0 {
+define float @v_log10_f32_nnan_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; SI-SDAG-LABEL: v_log10_f32_nnan_ninf_daz:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5137,7 +5137,7 @@ define float @v_log10_f32_nnan_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
+define float @v_log10_f32_nnan_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log10_f32_nnan_ninf_dynamic:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5315,7 +5315,7 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_log10_f32_fast_daz(float %in) #0 {
+define float @v_log10_f32_fast_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log10_f32_fast_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5344,7 +5344,7 @@ define float @v_log10_f32_fast_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log10_f32_dynamic_mode(float %in) #1 {
+define float @v_log10_f32_dynamic_mode(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; SI-SDAG-LABEL: v_log10_f32_dynamic_mode:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -7682,21 +7682,17 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) {
   ret <4 x half> %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.log10.f32(float) #2
-declare <2 x float> @llvm.log10.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.log10.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.log10.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.log10.f16(half) #2
-declare <2 x half> @llvm.log10.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.log10.v3f16(<3 x half>) #2
-declare <4 x half> @llvm.log10.v4f16(<4 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
-attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
-attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
-attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare float @llvm.fabs.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.log10.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.log10.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @llvm.log10.v3f32(<3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.log10.v4f32(<4 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fabs.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.log10.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.log10.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x half> @llvm.log10.v3f16(<3 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x half> @llvm.log10.v4f16(<4 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX689-GISEL: {{.*}}
 ; GFX689-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
index 035b2439eff153..d9c4ef8fe497e0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
@@ -1826,7 +1826,7 @@ define float @v_log2_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_log2_f32_afn_daz(float %in) #0 {
+define float @v_log2_f32_afn_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log2_f32_afn_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1852,7 +1852,7 @@ define float @v_log2_f32_afn_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log2_f32_afn_dynamic(float %in) #1 {
+define float @v_log2_f32_afn_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX689-SDAG-LABEL: v_log2_f32_afn_dynamic:
 ; GFX689-SDAG:       ; %bb.0:
 ; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1991,7 +1991,7 @@ define float @v_fabs_log2_f32_afn(float %in) {
   ret float %result
 }
 
-define float @v_log2_f32_daz(float %in) #0 {
+define float @v_log2_f32_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log2_f32_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2085,7 +2085,7 @@ define float @v_log2_f32_nnan(float %in) {
   ret float %result
 }
 
-define float @v_log2_f32_nnan_daz(float %in) #0 {
+define float @v_log2_f32_nnan_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log2_f32_nnan_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2111,7 +2111,7 @@ define float @v_log2_f32_nnan_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log2_f32_nnan_dynamic(float %in) #1 {
+define float @v_log2_f32_nnan_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX689-SDAG-LABEL: v_log2_f32_nnan_dynamic:
 ; GFX689-SDAG:       ; %bb.0:
 ; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2179,7 +2179,7 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_log2_f32_ninf_daz(float %in) #0 {
+define float @v_log2_f32_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log2_f32_ninf_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2205,7 +2205,7 @@ define float @v_log2_f32_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log2_f32_ninf_dynamic(float %in) #1 {
+define float @v_log2_f32_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX689-SDAG-LABEL: v_log2_f32_ninf_dynamic:
 ; GFX689-SDAG:       ; %bb.0:
 ; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2341,7 +2341,7 @@ define float @v_log2_f32_nnan_ninf(float %in) {
   ret float %result
 }
 
-define float @v_log2_f32_nnan_ninf_daz(float %in) #0 {
+define float @v_log2_f32_nnan_ninf_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log2_f32_nnan_ninf_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2367,7 +2367,7 @@ define float @v_log2_f32_nnan_ninf_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
+define float @v_log2_f32_nnan_ninf_dynamic(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX689-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic:
 ; GFX689-SDAG:       ; %bb.0:
 ; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2435,7 +2435,7 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 {
   ret float %result
 }
 
-define float @v_log2_f32_fast_daz(float %in) #0 {
+define float @v_log2_f32_fast_daz(float %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
 ; GFX689-LABEL: v_log2_f32_fast_daz:
 ; GFX689:       ; %bb.0:
 ; GFX689-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2461,7 +2461,7 @@ define float @v_log2_f32_fast_daz(float %in) #0 {
   ret float %result
 }
 
-define float @v_log2_f32_dynamic_mode(float %in) #1 {
+define float @v_log2_f32_dynamic_mode(float %in) "denormal-fp-math-f32"="dynamic,dynamic" {
 ; GFX689-SDAG-LABEL: v_log2_f32_dynamic_mode:
 ; GFX689-SDAG:       ; %bb.0:
 ; GFX689-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3990,18 +3990,14 @@ define <4 x half> @v_log2_v4f16_fast(<4 x half> %in) {
   ret <4 x half> %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.log2.f32(float) #2
-declare <2 x float> @llvm.log2.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.log2.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.log2.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.log2.f16(half) #2
-declare <2 x half> @llvm.log2.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.log2.v3f16(<3 x half>) #2
-declare <4 x half> @llvm.log2.v4f16(<4 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
-attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
-attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
-attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare float @llvm.fabs.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare float @llvm.log2.f32(float) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x float> @llvm.log2.v2f32(<2 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x float> @llvm.log2.v3f32(<3 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.log2.v4f32(<4 x float>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fabs.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.log2.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.log2.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <3 x half> @llvm.log2.v3f16(<3 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x half> @llvm.log2.v4f16(<4 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index d056a97dc54442..59d1999160b60e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -143,7 +143,7 @@ define amdgpu_kernel void @maxnum_f16(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load volatile half, ptr addrspace(1) %a
   %b.val = load volatile half, ptr addrspace(1) %b
@@ -252,7 +252,7 @@ define amdgpu_kernel void @maxnum_f16_imm_a(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %b.val = load half, ptr addrspace(1) %b
   %r.val = call half @llvm.maxnum.f16(half 3.0, half %b.val)
@@ -360,7 +360,7 @@ define amdgpu_kernel void @maxnum_f16_imm_b(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %a) #0 {
+    ptr addrspace(1) %a) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.maxnum.f16(half %a.val, half 4.0)
@@ -474,7 +474,7 @@ define amdgpu_kernel void @maxnum_v2f16(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -565,7 +565,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
@@ -655,7 +655,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %a) #0 {
+    ptr addrspace(1) %a) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
@@ -792,7 +792,7 @@ define amdgpu_kernel void @maxnum_v3f16(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <3 x half>, ptr addrspace(1) %a
   %b.val = load <3 x half>, ptr addrspace(1) %b
@@ -939,7 +939,7 @@ define amdgpu_kernel void @maxnum_v4f16(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <4 x half>, ptr addrspace(1) %a
   %b.val = load <4 x half>, ptr addrspace(1) %b
@@ -1057,12 +1057,10 @@ define amdgpu_kernel void @fmax_v4f16_imm_a(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %b.val = load <4 x half>, ptr addrspace(1) %b
   %r.val = call <4 x half> @llvm.maxnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
   store <4 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
-
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index f934a2de9247f0..d83d8e5d0eac97 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -142,7 +142,7 @@ define amdgpu_kernel void @minnum_f16_ieee(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load volatile half, ptr addrspace(1) %a
   %b.val = load volatile half, ptr addrspace(1) %b
@@ -151,7 +151,7 @@ entry:
   ret void
 }
 
-define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) #0 {
+define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: minnum_f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
@@ -278,7 +278,7 @@ define amdgpu_kernel void @minnum_f16_imm_a(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %b.val = load half, ptr addrspace(1) %b
   %r.val = call half @llvm.minnum.f16(half 3.0, half %b.val)
@@ -385,7 +385,7 @@ define amdgpu_kernel void @minnum_f16_imm_b(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %a) #0 {
+    ptr addrspace(1) %a) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.minnum.f16(half %a.val, half 4.0)
@@ -498,7 +498,7 @@ define amdgpu_kernel void @minnum_v2f16_ieee(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -507,7 +507,7 @@ entry:
   ret void
 }
 
-define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) #0 {
+define amdgpu_ps <2 x half> @minnum_v2f16_no_ieee(<2 x half> %a, <2 x half> %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SI-LABEL: minnum_v2f16_no_ieee:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
@@ -623,7 +623,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_a(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> <half 3.0, half 4.0>, <2 x half> %b.val)
@@ -712,7 +712,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_b(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %a) #0 {
+    ptr addrspace(1) %a) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a.val, <2 x half> <half 4.0, half 3.0>)
@@ -848,7 +848,7 @@ define amdgpu_kernel void @minnum_v3f16(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <3 x half>, ptr addrspace(1) %a
   %b.val = load <3 x half>, ptr addrspace(1) %b
@@ -994,7 +994,7 @@ define amdgpu_kernel void @minnum_v4f16(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <4 x half>, ptr addrspace(1) %a
   %b.val = load <4 x half>, ptr addrspace(1) %b
@@ -1111,12 +1111,10 @@ define amdgpu_kernel void @fmin_v4f16_imm_a(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %b.val = load <4 x half>, ptr addrspace(1) %b
   %r.val = call <4 x half> @llvm.minnum.v4f16(<4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, <4 x half> %b.val)
   store <4 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
-
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll
index a467e29d0bff63..93d2bdcd3fca73 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll
@@ -405,8 +405,6 @@ define float @v_powi_neg128_f32(float %l) {
 ;   ret double %res
 ; }
 
-declare half @llvm.powi.f16.i32(half, i32) #0
-declare float @llvm.powi.f32.i32(float, i32) #0
-declare double @llvm.powi.f64.i32(double, i32) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare half @llvm.powi.f16.i32(half, i32) nounwind readnone speculatable willreturn
+declare float @llvm.powi.f32.i32(float, i32) nounwind readnone speculatable willreturn
+declare double @llvm.powi.f64.i32(double, i32) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
index 7ec7d9bdb400f6..6682aa29a91850 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
@@ -44,14 +44,12 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.cube(<4 x float>) #0
+declare <4 x float> @llvm.r600.cube(<4 x float>) nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fabs.f32(float) #0
+declare float @llvm.fabs.f32(float) nounwind readnone
 
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
index 704e6bb2e04b0a..3efe8f0d590ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
@@ -2,7 +2,7 @@
 
 ; EG-LABEL: {{^}}test_group_barrier:
 ; EG: GROUP_BARRIER
-define amdgpu_kernel void @test_group_barrier(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_group_barrier(ptr addrspace(1) %out) nounwind {
 entry:
   %tmp = call i32 @llvm.r600.read.tidig.x()
   %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp
@@ -18,14 +18,10 @@ entry:
 }
 
 ; Function Attrs: convergent nounwind
-declare void @llvm.r600.group.barrier() #1
+declare void @llvm.r600.group.barrier() convergent nounwind
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #2
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.local.size.x() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
-attributes #2 = { nounwind readnone }
+declare i32 @llvm.r600.read.local.size.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
index ab035b9de04b9d..36f9e1ec09d250 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
@@ -16,7 +16,7 @@
 ; GCN: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_x(ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.local.size.x() #0
+  %0 = call i32 @llvm.r600.read.local.size.x() nounwind readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -31,7 +31,7 @@ entry:
 ; GCN: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_y(ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.local.size.y() #0
+  %0 = call i32 @llvm.r600.read.local.size.y() nounwind readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -46,7 +46,7 @@ entry:
 ; GCN: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_z(ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.local.size.z() #0
+  %0 = call i32 @llvm.r600.read.local.size.z() nounwind readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -59,8 +59,8 @@ entry:
 ; GCN: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_xy(ptr addrspace(1) %out) {
 entry:
-  %x = call i32 @llvm.r600.read.local.size.x() #0
-  %y = call i32 @llvm.r600.read.local.size.y() #0
+  %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone
+  %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone
   %val = mul i32 %x, %y
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -78,8 +78,8 @@ entry:
 ; GCN: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_xz(ptr addrspace(1) %out) {
 entry:
-  %x = call i32 @llvm.r600.read.local.size.x() #0
-  %z = call i32 @llvm.r600.read.local.size.z() #0
+  %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone
+  %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone
   %val = mul i32 %x, %z
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -96,8 +96,8 @@ entry:
 ; GCN: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_yz(ptr addrspace(1) %out) {
 entry:
-  %y = call i32 @llvm.r600.read.local.size.y() #0
-  %z = call i32 @llvm.r600.read.local.size.z() #0
+  %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone
+  %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone
   %val = mul i32 %y, %z
   store i32 %val, ptr addrspace(1) %out
   ret void
@@ -117,9 +117,9 @@ entry:
 ; GCN: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_xyz(ptr addrspace(1) %out) {
 entry:
-  %x = call i32 @llvm.r600.read.local.size.x() #0
-  %y = call i32 @llvm.r600.read.local.size.y() #0
-  %z = call i32 @llvm.r600.read.local.size.z() #0
+  %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone
+  %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone
+  %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone
   %xy = mul i32 %x, %y
   %xyz = add i32 %xy, %z
   store i32 %xyz, ptr addrspace(1) %out
@@ -134,7 +134,7 @@ entry:
 ; GCN-NEXT: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_x_known_bits(ptr addrspace(1) %out) {
 entry:
-  %size = call i32 @llvm.r600.read.local.size.x() #0
+  %size = call i32 @llvm.r600.read.local.size.x() nounwind readnone
   %shl = shl i32 %size, 16
   %shr = lshr i32 %shl, 16
   store i32 %shr, ptr addrspace(1) %out
@@ -149,7 +149,7 @@ entry:
 ; GCN-NEXT: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_y_known_bits(ptr addrspace(1) %out) {
 entry:
-  %size = call i32 @llvm.r600.read.local.size.y() #0
+  %size = call i32 @llvm.r600.read.local.size.y() nounwind readnone
   %shl = shl i32 %size, 16
   %shr = lshr i32 %shl, 16
   store i32 %shr, ptr addrspace(1) %out
@@ -164,15 +164,13 @@ entry:
 ; GCN-NEXT: buffer_store_dword [[VVAL]]
 define amdgpu_kernel void @local_size_z_known_bits(ptr addrspace(1) %out) {
 entry:
-  %size = call i32 @llvm.r600.read.local.size.z() #0
+  %size = call i32 @llvm.r600.read.local.size.z() nounwind readnone
   %shl = shl i32 %size, 16
   %shr = lshr i32 %shl, 16
   store i32 %shr, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.r600.read.local.size.x() #0
-declare i32 @llvm.r600.read.local.size.y() #0
-declare i32 @llvm.r600.read.local.size.z() #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.r600.read.local.size.x() nounwind readnone
+declare i32 @llvm.r600.read.local.size.y() nounwind readnone
+declare i32 @llvm.r600.read.local.size.z() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
index e670e78954c86a..64403cff4f3088 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
@@ -57,9 +57,7 @@ bb:
 }
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.texc(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare <4 x float> @llvm.r600.texc(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
index 58a7771c23a34e..5b67514afd145a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
@@ -8,7 +8,7 @@
 ; SI: v_rndne_f32_e32
 define amdgpu_kernel void @rint_f32(ptr addrspace(1) %out, float %in) {
 entry:
-  %0 = call float @llvm.rint.f32(float %in) #0
+  %0 = call float @llvm.rint.f32(float %in) nounwind readnone
   store float %0, ptr addrspace(1) %out
   ret void
 }
@@ -21,7 +21,7 @@ entry:
 ; SI: v_rndne_f32_e32
 define amdgpu_kernel void @rint_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
 entry:
-  %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0
+  %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) nounwind readnone
   store <2 x float> %0, ptr addrspace(1) %out
   ret void
 }
@@ -38,13 +38,11 @@ entry:
 ; SI: v_rndne_f32_e32
 define amdgpu_kernel void @rint_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 entry:
-  %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0
+  %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) nounwind readnone
   store <4 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
-declare float @llvm.rint.f32(float) #0
-declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0
-declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0
-
-attributes #0 = { nounwind readnone }
+declare float @llvm.rint.f32(float) nounwind readnone
+declare <2 x float> @llvm.rint.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.rint.v4f32(<4 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
index ddbc5ef4e5b600..12fdeb7ce78612 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=CI %s
 
-define amdgpu_kernel void @round_f64(ptr addrspace(1) %out, double %x) #0 {
+define amdgpu_kernel void @round_f64(ptr addrspace(1) %out, double %x) nounwind {
 ; SI-LABEL: round_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -60,12 +60,12 @@ define amdgpu_kernel void @round_f64(ptr addrspace(1) %out, double %x) #0 {
 ; CI-NEXT:    s_mov_b32 s5, s1
 ; CI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; CI-NEXT:    s_endpgm
-  %result = call double @llvm.round.f64(double %x) #1
+  %result = call double @llvm.round.f64(double %x) nounwind readnone
   store double %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: v_round_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -129,16 +129,16 @@ define amdgpu_kernel void @v_round_f64(ptr addrspace(1) %out, ptr addrspace(1) %
 ; CI-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; CI-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid
   %x = load double, ptr addrspace(1) %gep
-  %result = call double @llvm.round.f64(double %x) #1
+  %result = call double @llvm.round.f64(double %x) nounwind readnone
   store double %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @round_v2f64(ptr addrspace(1) %out, <2 x double> %in) #0 {
+define amdgpu_kernel void @round_v2f64(ptr addrspace(1) %out, <2 x double> %in) nounwind {
 ; SI-LABEL: round_v2f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -224,12 +224,12 @@ define amdgpu_kernel void @round_v2f64(ptr addrspace(1) %out, <2 x double> %in)
 ; CI-NEXT:    s_mov_b32 s2, -1
 ; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
-  %result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) #1
+  %result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) nounwind readnone
   store <2 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @round_v4f64(ptr addrspace(1) %out, <4 x double> %in) #0 {
+define amdgpu_kernel void @round_v4f64(ptr addrspace(1) %out, <4 x double> %in) nounwind {
 ; SI-LABEL: round_v4f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x11
@@ -378,12 +378,12 @@ define amdgpu_kernel void @round_v4f64(ptr addrspace(1) %out, <4 x double> %in)
 ; CI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
 ; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
-  %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1
+  %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) nounwind readnone
   store <4 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @round_v8f64(ptr addrspace(1) %out, <8 x double> %in) #0 {
+define amdgpu_kernel void @round_v8f64(ptr addrspace(1) %out, <8 x double> %in) nounwind {
 ; SI-LABEL: round_v8f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x19
@@ -656,17 +656,14 @@ define amdgpu_kernel void @round_v8f64(ptr addrspace(1) %out, <8 x double> %in)
 ; CI-NEXT:    buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:16
 ; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
-  %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1
+  %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) nounwind readnone
   store <8 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-declare double @llvm.round.f64(double) #1
-declare <2 x double> @llvm.round.v2f64(<2 x double>) #1
-declare <4 x double> @llvm.round.v4f64(<4 x double>) #1
-declare <8 x double> @llvm.round.v8f64(<8 x double>) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare double @llvm.round.f64(double) nounwind readnone
+declare <2 x double> @llvm.round.v2f64(<2 x double>) nounwind readnone
+declare <4 x double> @llvm.round.v4f64(<4 x double>) nounwind readnone
+declare <8 x double> @llvm.round.v8f64(<8 x double>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
index 7ad7cc821c1b56..db222bdb858ee1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GFX11 %s
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=R600 %s
 
-define amdgpu_kernel void @round_f32(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @round_f32(ptr addrspace(1) %out, float %x) nounwind {
 ; GFX6-LABEL: round_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s6, s[0:1], 0xb
@@ -97,7 +97,7 @@ define amdgpu_kernel void @round_f32(ptr addrspace(1) %out, float %x) #0 {
 ; R600-NEXT:     ADD T0.X, T0.W, PV.W,
 ; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %result = call float @llvm.round.f32(float %x) #1
+  %result = call float @llvm.round.f32(float %x) nounwind readnone
   store float %result, ptr addrspace(1) %out
   ret void
 }
@@ -106,7 +106,7 @@ define amdgpu_kernel void @round_f32(ptr addrspace(1) %out, float %x) #0 {
 ; predict how the scheduler will order the instructions.  We already have
 ; a test for the scalar case, so the vector tests just check that the
 ; compiler doesn't crash.
-define amdgpu_kernel void @round_v2f32(ptr addrspace(1) %out, <2 x float> %in) #0 {
+define amdgpu_kernel void @round_v2f32(ptr addrspace(1) %out, <2 x float> %in) nounwind {
 ; GFX6-LABEL: round_v2f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -208,12 +208,12 @@ define amdgpu_kernel void @round_v2f32(ptr addrspace(1) %out, <2 x float> %in) #
 ; R600-NEXT:     ADD T0.X, T2.W, PV.W,
 ; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1
+  %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) nounwind readnone
   store <2 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @round_v4f32(ptr addrspace(1) %out, <4 x float> %in) #0 {
+define amdgpu_kernel void @round_v4f32(ptr addrspace(1) %out, <4 x float> %in) nounwind {
 ; GFX6-LABEL: round_v4f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -404,12 +404,12 @@ define amdgpu_kernel void @round_v4f32(ptr addrspace(1) %out, <4 x float> %in) #
 ; R600-NEXT:     ADD T4.X, T3.W, PV.W,
 ; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
 ; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1
+  %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) nounwind readnone
   store <4 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @round_v8f32(ptr addrspace(1) %out, <8 x float> %in) #0 {
+define amdgpu_kernel void @round_v8f32(ptr addrspace(1) %out, <8 x float> %in) nounwind {
 ; GFX6-LABEL: round_v8f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x11
@@ -677,12 +677,12 @@ define amdgpu_kernel void @round_v8f32(ptr addrspace(1) %out, <8 x float> %in) #
 ; R600-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; R600-NEXT:     LSHR * T2.X, PV.W, literal.x,
 ; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1
+  %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) nounwind readnone
   store <8 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @round_f16(ptr addrspace(1) %out, i32 %x.arg) #0 {
+define amdgpu_kernel void @round_f16(ptr addrspace(1) %out, i32 %x.arg) nounwind {
 ; GFX6-LABEL: round_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -789,13 +789,13 @@ define amdgpu_kernel void @round_f16(ptr addrspace(1) %out, i32 %x.arg) #0 {
 ; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %x.arg.trunc = trunc i32 %x.arg to i16
   %x = bitcast i16 %x.arg.trunc to half
-  %result = call half @llvm.round.f16(half %x) #1
+  %result = call half @llvm.round.f16(half %x) nounwind readnone
   store half %result, ptr addrspace(1) %out
   ret void
 }
 
 ; Should be scalarized
-define amdgpu_kernel void @round_v2f16(ptr addrspace(1) %out, i32 %in.arg) #0 {
+define amdgpu_kernel void @round_v2f16(ptr addrspace(1) %out, i32 %in.arg) nounwind {
 ; GFX6-LABEL: round_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -951,15 +951,12 @@ define amdgpu_kernel void @round_v2f16(ptr addrspace(1) %out, i32 %in.arg) #0 {
   ret void
 }
 
-declare float @llvm.round.f32(float) #1
-declare <2 x float> @llvm.round.v2f32(<2 x float>) #1
-declare <4 x float> @llvm.round.v4f32(<4 x float>) #1
-declare <8 x float> @llvm.round.v8f32(<8 x float>) #1
+declare float @llvm.round.f32(float) nounwind readnone
+declare <2 x float> @llvm.round.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.round.v4f32(<4 x float>) nounwind readnone
+declare <8 x float> @llvm.round.v8f32(<8 x float>) nounwind readnone
 
-declare half @llvm.round.f16(half) #1
-declare <2 x half> @llvm.round.v2f16(<2 x half>) #1
-declare <4 x half> @llvm.round.v4f16(<4 x half>) #1
-declare <8 x half> @llvm.round.v8f16(<8 x half>) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare half @llvm.round.f16(half) nounwind readnone
+declare <2 x half> @llvm.round.v2f16(<2 x half>) nounwind readnone
+declare <4 x half> @llvm.round.v4f16(<4 x half>) nounwind readnone
+declare <8 x half> @llvm.round.v8f16(<8 x half>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
index 576ed270183f65..f46762e41d6d38 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
@@ -16,7 +16,7 @@
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="false" {
    %sin = call float @llvm.sin.f32(float %x)
    store float %sin, ptr addrspace(1) %out
    ret void
@@ -29,7 +29,7 @@ define amdgpu_kernel void @sin_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="false" {
   %y = fmul float 3.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -44,7 +44,7 @@ define amdgpu_kernel void @safe_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2 {
+define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="true" {
   %y = fmul float 3.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -59,7 +59,7 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(ptr addrspace(1) %out, float %x) #2
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="false" {
   %y = fmul reassoc float 3.0, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -73,7 +73,7 @@ define amdgpu_kernel void @fmf_sin_3x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="false" {
   %y = fmul float 2.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -88,7 +88,7 @@ define amdgpu_kernel void @safe_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2 {
+define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="true" {
   %y = fmul float 2.0, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -103,7 +103,7 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(ptr addrspace(1) %out, float %x) #2
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="false" {
   %y = fmul reassoc float 2.0, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -117,7 +117,7 @@ define amdgpu_kernel void @fmf_sin_2x_f32(ptr addrspace(1) %out, float %x) #1 {
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="false" {
   %y = fmul float 0x401921FB60000000, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -131,7 +131,7 @@ define amdgpu_kernel void @safe_sin_cancel_f32(ptr addrspace(1) %out, float %x)
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) #2 {
+define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="true" {
   %y = fmul float 0x401921FB60000000, %x
   %sin = call float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -145,7 +145,7 @@ define amdgpu_kernel void @unsafe_sin_cancel_f32(ptr addrspace(1) %out, float %x
 ; GFX9-NOT: v_fract_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #1 {
+define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) nounwind "unsafe-fp-math"="false" {
   %y = fmul reassoc float 0x401921FB60000000, %x
   %sin = call reassoc float @llvm.sin.f32(float %y)
   store float %sin, ptr addrspace(1) %out
@@ -164,15 +164,11 @@ define amdgpu_kernel void @fmf_sin_cancel_f32(ptr addrspace(1) %out, float %x) #
 ; GCN: v_sin_f32
 ; GCN: v_sin_f32
 ; GCN-NOT: v_sin_f32
-define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) #1 {
+define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) nounwind "unsafe-fp-math"="false" {
    %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
    store <4 x float> %sin, ptr addrspace(1) %out
    ret void
 }
 
-declare float @llvm.sin.f32(float) #0
-declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "unsafe-fp-math"="false" }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+declare float @llvm.sin.f32(float) nounwind readnone
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-f64.ll b/llvm/test/CodeGen/AMDGPU/load-constant-f64.ll
index cfaefca3a516d7..673d81c084948a 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-f64.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
 ; FUNC-LABEL: {{^}}constant_load_f64:
-define amdgpu_kernel void @constant_load_f64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_f64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_f64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -62,8 +62,6 @@ define amdgpu_kernel void @constant_load_f64(ptr addrspace(1) %out, ptr addrspac
   ret void
 }
 
-attributes #0 = { nounwind }
-
 ; Tests whether a load-chain of 8 constants of 64bit each gets vectorized into a wider load.
 define amdgpu_kernel void @constant_load_2v4f64(ptr addrspace(4) noalias nocapture readonly %weights, ptr addrspace(1) noalias nocapture %out_ptr) {
 ; GFX6-NOHSA-LABEL: constant_load_2v4f64:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
index 88b18232ef9c87..f1369ce071f7c3 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
-define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_load_i1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -81,7 +81,7 @@ define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v2i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_load_v2i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_load_v2i1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -154,7 +154,7 @@ define amdgpu_kernel void @constant_load_v2i1(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v3i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_load_v3i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_load_v3i1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -226,7 +226,7 @@ define amdgpu_kernel void @constant_load_v3i1(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v4i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_load_v4i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_load_v4i1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -299,7 +299,7 @@ define amdgpu_kernel void @constant_load_v4i1(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v8i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_load_v8i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_load_v8i1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -372,7 +372,7 @@ define amdgpu_kernel void @constant_load_v8i1(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v16i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_load_v16i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_load_v16i1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -445,7 +445,7 @@ define amdgpu_kernel void @constant_load_v16i1(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v32i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_load_v32i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_load_v32i1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -502,7 +502,7 @@ define amdgpu_kernel void @constant_load_v32i1(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v64i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_load_v64i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_load_v64i1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -562,7 +562,7 @@ define amdgpu_kernel void @constant_load_v64i1(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_i1_to_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -626,7 +626,7 @@ define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_i1_to_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -695,7 +695,7 @@ define amdgpu_kernel void @constant_sextload_i1_to_i32(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v1i1_to_v1i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -759,7 +759,7 @@ define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v1i1_to_v1i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -828,7 +828,7 @@ define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v2i1_to_v2i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -904,7 +904,7 @@ define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v2i1_to_v2i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -980,7 +980,7 @@ define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v3i1_to_v3i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1070,7 +1070,7 @@ define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v3i1_to_v3i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1158,7 +1158,7 @@ define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v4i1_to_v4i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1254,7 +1254,7 @@ define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v4i1_to_v4i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1347,7 +1347,7 @@ define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v8i1_to_v8i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1478,7 +1478,7 @@ define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v8i1_to_v8i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1610,7 +1610,7 @@ define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v16i1_to_v16i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1824,7 +1824,7 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v16i1_to_v16i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -2040,7 +2040,7 @@ define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v32i1_to_v32i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2438,7 +2438,7 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v32i1_to_v32i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2856,7 +2856,7 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v64i1_to_v64i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3614,7 +3614,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v64i1_to_v64i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4401,7 +4401,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_i1_to_i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4472,7 +4472,7 @@ define amdgpu_kernel void @constant_zextload_i1_to_i64(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_i1_to_i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4544,7 +4544,7 @@ define amdgpu_kernel void @constant_sextload_i1_to_i64(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v1i1_to_v1i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4615,7 +4615,7 @@ define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v1i1_to_v1i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4687,7 +4687,7 @@ define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v2i1_to_v2i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4769,7 +4769,7 @@ define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v2i1_to_v2i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4855,7 +4855,7 @@ define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v3i1_to_v3i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4961,7 +4961,7 @@ define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v3i1_to_v3i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5073,7 +5073,7 @@ define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v4i1_to_v4i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5193,7 +5193,7 @@ define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v4i1_to_v4i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5318,7 +5318,7 @@ define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v8i1_to_v8i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -5495,7 +5495,7 @@ define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v8i1_to_v8i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -5688,7 +5688,7 @@ define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v16i1_to_v16i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -5998,7 +5998,7 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v16i1_to_v16i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -6337,7 +6337,7 @@ define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v32i1_to_v32i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6899,7 +6899,7 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v32i1_to_v32i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -7577,7 +7577,7 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_zextload_v64i1_to_v64i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -8651,7 +8651,7 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
+define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) nounwind {
 ; GFX6-LABEL: constant_sextload_v64i1_to_v64i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9973,5 +9973,3 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
   store <64 x i64> %ext, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index ab6a9dcf71acef..81a6a8f128fc88 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -543,7 +543,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v16i16_align2(ptr addrspace(4) %ptr0) #0 {
+define amdgpu_kernel void @constant_load_v16i16_align2(ptr addrspace(4) %ptr0) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2:
 ; GCN-NOHSA-SI:       ; %bb.0: ; %entry
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -775,7 +775,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -852,7 +852,7 @@ define amdgpu_kernel void @constant_zextload_i16_to_i32(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -930,7 +930,7 @@ define amdgpu_kernel void @constant_sextload_i16_to_i32(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1007,7 +1007,7 @@ define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1085,7 +1085,7 @@ define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1172,7 +1172,7 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(ptr addrspace(1) %ou
 }
 
 ; TODO: We should use ASHR instead of LSHR + BFE
-define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1462,7 +1462,7 @@ entry:
 
 ; v4i16 is naturally 8 byte aligned
 ; TODO: This should use LD, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1570,7 +1570,7 @@ define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(ptr addrspace(1) %ou
 ; v4i16 is naturally 8 byte aligned
 ; TODO: This should use LD, but for some there are redundant MOVs
 ; TODO: We should use ASHR instead of LSHR + BFE
-define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1679,7 +1679,7 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(ptr addrspace(1) %ou
 ; v8i16 is naturally 16 byte aligned
 ; TODO: These should use LSHR instead of BFE_UINT
 ; TODO: This should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1839,7 +1839,7 @@ define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(ptr addrspace(1) %ou
 ; v8i16 is naturally 16 byte aligned
 ; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT
 ; TODO: This should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1998,7 +1998,7 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2262,7 +2262,7 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2530,7 +2530,7 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -3010,7 +3010,7 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -3500,7 +3500,7 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x9
@@ -4423,7 +4423,7 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[36:39], s[0:1], 0x9
@@ -5362,7 +5362,7 @@ define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5450,7 +5450,7 @@ define amdgpu_kernel void @constant_zextload_i16_to_i64(ptr addrspace(1) %out, p
 ;          t31: i64 = any_extend t28
 ;        t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
 ; TODO: These could be expanded earlier using ASHR 15
-define amdgpu_kernel void @constant_sextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5537,7 +5537,7 @@ define amdgpu_kernel void @constant_sextload_i16_to_i64(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5620,7 +5620,7 @@ define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5707,7 +5707,7 @@ define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5802,7 +5802,7 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5902,7 +5902,7 @@ define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6034,7 +6034,7 @@ define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6184,7 +6184,7 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6391,7 +6391,7 @@ define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6639,7 +6639,7 @@ define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -7000,7 +7000,7 @@ define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -7449,7 +7449,7 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -8121,7 +8121,7 @@ define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -8977,18 +8977,16 @@ define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(ptr addrspace(1) %
 
 ; These trigger undefined register machine verifier errors
 
-; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ;   %load = load <64 x i16>, ptr addrspace(4) %in
 ;   %ext = zext <64 x i16> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(1) %out
 ;   ret void
 ; }
 
-; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ;   %load = load <64 x i16>, ptr addrspace(4) %in
 ;   %ext = sext <64 x i16> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(1) %out
 ;   ret void
 ; }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
index b0d8f72c22ba7a..61939fab066f49 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-HSA %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
-define amdgpu_kernel void @constant_load_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -88,7 +88,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v2i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -174,7 +174,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v3i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -271,7 +271,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v4i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -366,7 +366,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v8i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -498,7 +498,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v9i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v9i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v9i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -660,7 +660,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v10i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v10i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v10i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -828,7 +828,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v11i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v11i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v11i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -1007,7 +1007,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v12i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v12i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v12i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -1184,7 +1184,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v16i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -1392,7 +1392,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_i32_to_i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1477,7 +1477,7 @@ define amdgpu_kernel void @constant_zextload_i32_to_i64(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_i32_to_i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1570,7 +1570,7 @@ define amdgpu_kernel void @constant_sextload_i32_to_i64(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v1i32_to_v1i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1655,7 +1655,7 @@ define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v1i32_to_v1i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1748,7 +1748,7 @@ define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v2i32_to_v2i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1846,7 +1846,7 @@ define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v2i32_to_v2i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1960,7 +1960,7 @@ define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v4i32_to_v4i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2090,7 +2090,7 @@ define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v4i32_to_v4i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2251,7 +2251,7 @@ define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v8i32_to_v8i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -2450,7 +2450,7 @@ define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v8i32_to_v8i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -2718,7 +2718,7 @@ define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v16i32_to_v16i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -3198,7 +3198,7 @@ define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v16i32_to_v16i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -3541,7 +3541,7 @@ define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v32i32_to_v32i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -4456,7 +4456,7 @@ define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v32i32_to_v32i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5095,7 +5095,7 @@ define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v32i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5465,5 +5465,3 @@ define amdgpu_kernel void @constant_load_v32i32(ptr addrspace(1) %out, ptr addrs
   store <32 x i32> %ld, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i64.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i64.ll
index 66c73fda38743f..d418da6d7178ca 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i64.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
-define amdgpu_kernel void @constant_load_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-LABEL: constant_load_i64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -78,7 +78,7 @@ define amdgpu_kernel void @constant_load_i64(ptr addrspace(1) %out, ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-LABEL: constant_load_v2i64:
 ; GFX6:       ; %bb.0: ; %entry
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -159,7 +159,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v3i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v3i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-LABEL: constant_load_v3i64:
 ; GFX6:       ; %bb.0: ; %entry
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -275,7 +275,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-LABEL: constant_load_v4i64:
 ; GFX6:       ; %bb.0: ; %entry
 ; GFX6-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -393,7 +393,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-LABEL: constant_load_v8i64:
 ; GFX6:       ; %bb.0: ; %entry
 ; GFX6-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -585,7 +585,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-LABEL: constant_load_v16i64:
 ; GFX6:       ; %bb.0: ; %entry
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -939,5 +939,3 @@ entry:
   store <16 x i64> %ld, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
index 952827b8cd0e71..5c71dddd905a5c 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -6,7 +6,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
 ; TODO: NOT AND
-define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_i8:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -93,7 +93,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v2i8:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -180,7 +180,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v3i8:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -295,7 +295,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v4i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v4i8(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v4i8:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -365,7 +365,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v8i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v8i8(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v8i8:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -439,7 +439,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v16i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v16i8(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_load_v16i8:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -520,7 +520,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_i8_to_i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -597,7 +597,7 @@ define amdgpu_kernel void @constant_zextload_i8_to_i32(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_i8_to_i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -675,7 +675,7 @@ define amdgpu_kernel void @constant_sextload_i8_to_i32(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v1i8_to_v1i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -752,7 +752,7 @@ define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v1i8_to_v1i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -831,7 +831,7 @@ define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(ptr addrspace(1) %out
 }
 
 ; TODO: This should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v2i8_to_v2i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -931,7 +931,7 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(ptr addrspace(1) %out
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v2i8_to_v2i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1031,7 +1031,7 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(ptr addrspace(1) %out
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v3i8_to_v3i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1134,7 +1134,7 @@ entry:
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v3i8_to_v3i32:
 ; GFX6-NOHSA:       ; %bb.0: ; %entry
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1239,7 +1239,7 @@ entry:
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v4i8_to_v4i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1345,7 +1345,7 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(ptr addrspace(1) %out
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v4i8_to_v4i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1454,7 +1454,7 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(ptr addrspace(1) %out
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v8i8_to_v8i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1613,7 +1613,7 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(ptr addrspace(1) %out
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v8i8_to_v8i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1779,7 +1779,7 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(ptr addrspace(1) %out
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v16i8_to_v16i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2043,7 +2043,7 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(ptr addrspace(1) %o
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v16i8_to_v16i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2323,7 +2323,7 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(ptr addrspace(1) %o
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v32i8_to_v32i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2799,7 +2799,7 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(ptr addrspace(1) %o
 }
 
 ; TODO: These should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v32i8_to_v32i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3312,7 +3312,7 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v64i8_to_v64i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -4223,7 +4223,7 @@ define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v64i8_to_v64i32:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x9
@@ -5199,7 +5199,7 @@ define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_i8_to_i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5283,7 +5283,7 @@ define amdgpu_kernel void @constant_zextload_i8_to_i64(ptr addrspace(1) %out, pt
 }
 
 ; TODO: Why not 7 ?
-define amdgpu_kernel void @constant_sextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_i8_to_i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5370,7 +5370,7 @@ define amdgpu_kernel void @constant_sextload_i8_to_i64(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v1i8_to_v1i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5452,7 +5452,7 @@ define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(ptr addrspace(1) %out
 }
 
 ; TODO: Why not 7 ?
-define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v1i8_to_v1i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5539,7 +5539,7 @@ define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v2i8_to_v2i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5646,7 +5646,7 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v2i8_to_v2i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5760,7 +5760,7 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v4i8_to_v4i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5896,7 +5896,7 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v4i8_to_v4i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6050,7 +6050,7 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v8i8_to_v8i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6264,7 +6264,7 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v8i8_to_v8i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6522,7 +6522,7 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v16i8_to_v16i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6891,7 +6891,7 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v16i8_to_v16i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -7351,7 +7351,7 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v32i8_to_v32i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -8043,7 +8043,7 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v32i8_to_v32i64:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -8922,7 +8922,7 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
 }
 
 ; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
-; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(4) %in
 ;   %ext = zext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(1) %out
@@ -8930,14 +8930,14 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o
 ; }
 
 ; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
-; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(4) %in
 ;   %ext = sext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(1) %out
 ;   ret void
 ; }
 
-define amdgpu_kernel void @constant_zextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_i8_to_i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9023,7 +9023,7 @@ define amdgpu_kernel void @constant_zextload_i8_to_i16(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_i8_to_i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9111,7 +9111,7 @@ define amdgpu_kernel void @constant_sextload_i8_to_i16(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v1i8_to_v1i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9197,7 +9197,7 @@ define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v1i8_to_v1i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9285,7 +9285,7 @@ define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v2i8_to_v2i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9384,7 +9384,7 @@ define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v2i8_to_v2i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9497,7 +9497,7 @@ define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v4i8_to_v4i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9632,7 +9632,7 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v4i8_to_v4i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9782,7 +9782,7 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v8i8_to_v8i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -9981,7 +9981,7 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v8i8_to_v8i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -10211,7 +10211,7 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v16i8_to_v16i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -10561,7 +10561,7 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v16i8_to_v16i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -10973,7 +10973,7 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_zextload_v32i8_to_v32i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -11621,7 +11621,7 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; GFX6-NOHSA-LABEL: constant_sextload_v32i8_to_v32i16:
 ; GFX6-NOHSA:       ; %bb.0:
 ; GFX6-NOHSA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -12397,7 +12397,7 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %o
 }
 
 ; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
-; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(4) %in
 ;   %ext = zext <64 x i8> %load to <64 x i16>
 ;   store <64 x i16> %ext, ptr addrspace(1) %out
@@ -12405,11 +12405,9 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %o
 ; }
 
 ; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
-; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(4) %in
 ;   %ext = sext <64 x i8> %load to <64 x i16>
 ;   store <64 x i16> %ext, ptr addrspace(1) %out
 ;   ret void
 ; }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-f32.ll b/llvm/test/CodeGen/AMDGPU/load-global-f32.ll
index 7b1355425729e9..51a561f50b0c51 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-f32.ll
@@ -10,7 +10,7 @@
 ; GCN-HSA: flat_load_dword
 
 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-define amdgpu_kernel void @global_load_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load float, ptr addrspace(1) %in
   store float %tmp0, ptr addrspace(1) %out
@@ -22,7 +22,7 @@ entry:
 ; GCN-HSA: flat_load_dwordx2
 
 ; R600: VTX_READ_64
-define amdgpu_kernel void @global_load_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load <2 x float>, ptr addrspace(1) %in
   store <2 x float> %tmp0, ptr addrspace(1) %out
@@ -35,7 +35,7 @@ entry:
 ; GCNX3-HSA: flat_load_dwordx3
 
 ; R600: VTX_READ_128
-define amdgpu_kernel void @global_load_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load <3 x float>, ptr addrspace(1) %in
   store <3 x float> %tmp0, ptr addrspace(1) %out
@@ -47,7 +47,7 @@ entry:
 ; GCN-HSA: flat_load_dwordx4
 
 ; R600: VTX_READ_128
-define amdgpu_kernel void @global_load_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load <4 x float>, ptr addrspace(1) %in
   store <4 x float> %tmp0, ptr addrspace(1) %out
@@ -62,7 +62,7 @@ entry:
 
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
-define amdgpu_kernel void @global_load_v8f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v8f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load <8 x float>, ptr addrspace(1) %in
   store <8 x float> %tmp0, ptr addrspace(1) %out
@@ -80,7 +80,7 @@ entry:
 ; R600: VTX_READ_128
 ; R600: VTX_READ_32
 ; R600: VTX_READ_128
-define amdgpu_kernel void @global_load_v9f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v9f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load <9 x float>, ptr addrspace(1) %in
   store <9 x float> %tmp0, ptr addrspace(1) %out
@@ -99,7 +99,7 @@ entry:
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
-define amdgpu_kernel void @global_load_v10f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v10f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load <10 x float>, ptr addrspace(1) %in
   store <10 x float> %tmp0, ptr addrspace(1) %out
@@ -120,7 +120,7 @@ entry:
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
-define amdgpu_kernel void @global_load_v11f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v11f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load <11 x float>, ptr addrspace(1) %in
   store <11 x float> %tmp0, ptr addrspace(1) %out
@@ -138,7 +138,7 @@ entry:
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
-define amdgpu_kernel void @global_load_v12f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v12f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load <12 x float>, ptr addrspace(1) %in
   store <12 x float> %tmp0, ptr addrspace(1) %out
@@ -160,11 +160,9 @@ entry:
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
-define amdgpu_kernel void @global_load_v16f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v16f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp0 = load <16 x float>, ptr addrspace(1) %in
   store <16 x float> %tmp0, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-f64.ll b/llvm/test/CodeGen/AMDGPU/load-global-f64.ll
index ed3618dfd64745..0c5a7e2e995bd9 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-f64.ll
@@ -8,7 +8,7 @@
 
 ; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
 ; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
-define amdgpu_kernel void @global_load_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %ld = load double, ptr addrspace(1) %in
   store double %ld, ptr addrspace(1) %out
   ret void
@@ -17,7 +17,7 @@ define amdgpu_kernel void @global_load_f64(ptr addrspace(1) %out, ptr addrspace(
 ; FUNC-LABEL: {{^}}global_load_v2f64:
 ; GCN-NOHSA: buffer_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @global_load_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <2 x double>, ptr addrspace(1) %in
   store <2 x double> %ld, ptr addrspace(1) %out
@@ -29,7 +29,7 @@ entry:
 ; GCN-NOHSA-DAG: buffer_load_dwordx2
 ; GCN-HSA-DAG: flat_load_dwordx4
 ; GCN-HSA-DAG: flat_load_dwordx2
-define amdgpu_kernel void @global_load_v3f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v3f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <3 x double>, ptr addrspace(1) %in
   store <3 x double> %ld, ptr addrspace(1) %out
@@ -42,7 +42,7 @@ entry:
 
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @global_load_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <4 x double>, ptr addrspace(1) %in
   store <4 x double> %ld, ptr addrspace(1) %out
@@ -59,7 +59,7 @@ entry:
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @global_load_v8f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v8f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <8 x double>, ptr addrspace(1) %in
   store <8 x double> %ld, ptr addrspace(1) %out
@@ -84,11 +84,9 @@ entry:
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @global_load_v16f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v16f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <16 x double>, ptr addrspace(1) %in
   store <16 x double> %ld, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
index 5ab1f3d972b0a3..e8934b99d961f6 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
@@ -9,56 +9,56 @@
 
 ; EG: VTX_READ_8
 ; EG: AND_INT
-define amdgpu_kernel void @global_load_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load i1, ptr addrspace(1) %in
   store i1 %load, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}global_load_v2i1:
-define amdgpu_kernel void @global_load_v2i1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v2i1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(1) %in
   store <2 x i1> %load, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}global_load_v3i1:
-define amdgpu_kernel void @global_load_v3i1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v3i1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(1) %in
   store <3 x i1> %load, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}global_load_v4i1:
-define amdgpu_kernel void @global_load_v4i1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v4i1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(1) %in
   store <4 x i1> %load, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}global_load_v8i1:
-define amdgpu_kernel void @global_load_v8i1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v8i1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(1) %in
   store <8 x i1> %load, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}global_load_v16i1:
-define amdgpu_kernel void @global_load_v16i1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v16i1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(1) %in
   store <16 x i1> %load, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}global_load_v32i1:
-define amdgpu_kernel void @global_load_v32i1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v32i1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(1) %in
   store <32 x i1> %load, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}global_load_v64i1:
-define amdgpu_kernel void @global_load_v64i1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v64i1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(1) %in
   store <64 x i1> %load, ptr addrspace(1) %out
   ret void
@@ -67,7 +67,7 @@ define amdgpu_kernel void @global_load_v64i1(ptr addrspace(1) %out, ptr addrspac
 ; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
 ; GCN: buffer_load_ubyte
 ; GCN: buffer_store_dword
-define amdgpu_kernel void @global_zextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i1, ptr addrspace(1) %in
   %ext = zext i1 %a to i32
   store i32 %ext, ptr addrspace(1) %out
@@ -81,7 +81,7 @@ define amdgpu_kernel void @global_zextload_i1_to_i32(ptr addrspace(1) %out, ptr
 
 ; EG: VTX_READ_8
 ; EG: BFE_INT
-define amdgpu_kernel void @global_sextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i1, ptr addrspace(1) %in
   %ext = sext i1 %a to i32
   store i32 %ext, ptr addrspace(1) %out
@@ -89,7 +89,7 @@ define amdgpu_kernel void @global_sextload_i1_to_i32(ptr addrspace(1) %out, ptr
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i32:
-define amdgpu_kernel void @global_zextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i1>, ptr addrspace(1) %in
   %ext = zext <1 x i1> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(1) %out
@@ -97,7 +97,7 @@ define amdgpu_kernel void @global_zextload_v1i1_to_v1i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i32:
-define amdgpu_kernel void @global_sextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i1>, ptr addrspace(1) %in
   %ext = sext <1 x i1> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(1) %out
@@ -105,7 +105,7 @@ define amdgpu_kernel void @global_sextload_v1i1_to_v1i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i32:
-define amdgpu_kernel void @global_zextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(1) %in
   %ext = zext <2 x i1> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(1) %out
@@ -113,7 +113,7 @@ define amdgpu_kernel void @global_zextload_v2i1_to_v2i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i32:
-define amdgpu_kernel void @global_sextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(1) %in
   %ext = sext <2 x i1> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(1) %out
@@ -121,7 +121,7 @@ define amdgpu_kernel void @global_sextload_v2i1_to_v2i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i32:
-define amdgpu_kernel void @global_zextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(1) %in
   %ext = zext <3 x i1> %load to <3 x i32>
   store <3 x i32> %ext, ptr addrspace(1) %out
@@ -129,7 +129,7 @@ define amdgpu_kernel void @global_zextload_v3i1_to_v3i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i32:
-define amdgpu_kernel void @global_sextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(1) %in
   %ext = sext <3 x i1> %load to <3 x i32>
   store <3 x i32> %ext, ptr addrspace(1) %out
@@ -137,7 +137,7 @@ define amdgpu_kernel void @global_sextload_v3i1_to_v3i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i32:
-define amdgpu_kernel void @global_zextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(1) %in
   %ext = zext <4 x i1> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(1) %out
@@ -145,7 +145,7 @@ define amdgpu_kernel void @global_zextload_v4i1_to_v4i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i32:
-define amdgpu_kernel void @global_sextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(1) %in
   %ext = sext <4 x i1> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(1) %out
@@ -153,7 +153,7 @@ define amdgpu_kernel void @global_sextload_v4i1_to_v4i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i32:
-define amdgpu_kernel void @global_zextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(1) %in
   %ext = zext <8 x i1> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(1) %out
@@ -161,7 +161,7 @@ define amdgpu_kernel void @global_zextload_v8i1_to_v8i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i32:
-define amdgpu_kernel void @global_sextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(1) %in
   %ext = sext <8 x i1> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(1) %out
@@ -169,7 +169,7 @@ define amdgpu_kernel void @global_sextload_v8i1_to_v8i32(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i32:
-define amdgpu_kernel void @global_zextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(1) %in
   %ext = zext <16 x i1> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(1) %out
@@ -177,7 +177,7 @@ define amdgpu_kernel void @global_zextload_v16i1_to_v16i32(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i32:
-define amdgpu_kernel void @global_sextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(1) %in
   %ext = sext <16 x i1> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(1) %out
@@ -185,7 +185,7 @@ define amdgpu_kernel void @global_sextload_v16i1_to_v16i32(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i32:
-define amdgpu_kernel void @global_zextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(1) %in
   %ext = zext <32 x i1> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(1) %out
@@ -193,7 +193,7 @@ define amdgpu_kernel void @global_zextload_v32i1_to_v32i32(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i32:
-define amdgpu_kernel void @global_sextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(1) %in
   %ext = sext <32 x i1> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(1) %out
@@ -201,7 +201,7 @@ define amdgpu_kernel void @global_sextload_v32i1_to_v32i32(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i32:
-define amdgpu_kernel void @global_zextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(1) %in
   %ext = zext <64 x i1> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(1) %out
@@ -209,7 +209,7 @@ define amdgpu_kernel void @global_zextload_v64i1_to_v64i32(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i32:
-define amdgpu_kernel void @global_sextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(1) %in
   %ext = sext <64 x i1> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(1) %out
@@ -221,7 +221,7 @@ define amdgpu_kernel void @global_sextload_v64i1_to_v64i32(ptr addrspace(1) %out
 ; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
 ; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]{{$}}
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @global_zextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i1, ptr addrspace(1) %in
   %ext = zext i1 %a to i64
   store i64 %ext, ptr addrspace(1) %out
@@ -233,7 +233,7 @@ define amdgpu_kernel void @global_zextload_i1_to_i64(ptr addrspace(1) %out, ptr
 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @global_sextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i1, ptr addrspace(1) %in
   %ext = sext i1 %a to i64
   store i64 %ext, ptr addrspace(1) %out
@@ -241,7 +241,7 @@ define amdgpu_kernel void @global_sextload_i1_to_i64(ptr addrspace(1) %out, ptr
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i64:
-define amdgpu_kernel void @global_zextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i1>, ptr addrspace(1) %in
   %ext = zext <1 x i1> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(1) %out
@@ -249,7 +249,7 @@ define amdgpu_kernel void @global_zextload_v1i1_to_v1i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i64:
-define amdgpu_kernel void @global_sextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i1>, ptr addrspace(1) %in
   %ext = sext <1 x i1> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(1) %out
@@ -257,7 +257,7 @@ define amdgpu_kernel void @global_sextload_v1i1_to_v1i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i64:
-define amdgpu_kernel void @global_zextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(1) %in
   %ext = zext <2 x i1> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(1) %out
@@ -265,7 +265,7 @@ define amdgpu_kernel void @global_zextload_v2i1_to_v2i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i64:
-define amdgpu_kernel void @global_sextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(1) %in
   %ext = sext <2 x i1> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(1) %out
@@ -273,7 +273,7 @@ define amdgpu_kernel void @global_sextload_v2i1_to_v2i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i64:
-define amdgpu_kernel void @global_zextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(1) %in
   %ext = zext <3 x i1> %load to <3 x i64>
   store <3 x i64> %ext, ptr addrspace(1) %out
@@ -281,7 +281,7 @@ define amdgpu_kernel void @global_zextload_v3i1_to_v3i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i64:
-define amdgpu_kernel void @global_sextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(1) %in
   %ext = sext <3 x i1> %load to <3 x i64>
   store <3 x i64> %ext, ptr addrspace(1) %out
@@ -289,7 +289,7 @@ define amdgpu_kernel void @global_sextload_v3i1_to_v3i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i64:
-define amdgpu_kernel void @global_zextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(1) %in
   %ext = zext <4 x i1> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(1) %out
@@ -297,7 +297,7 @@ define amdgpu_kernel void @global_zextload_v4i1_to_v4i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i64:
-define amdgpu_kernel void @global_sextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(1) %in
   %ext = sext <4 x i1> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(1) %out
@@ -305,7 +305,7 @@ define amdgpu_kernel void @global_sextload_v4i1_to_v4i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i64:
-define amdgpu_kernel void @global_zextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(1) %in
   %ext = zext <8 x i1> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(1) %out
@@ -313,7 +313,7 @@ define amdgpu_kernel void @global_zextload_v8i1_to_v8i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i64:
-define amdgpu_kernel void @global_sextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(1) %in
   %ext = sext <8 x i1> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(1) %out
@@ -321,7 +321,7 @@ define amdgpu_kernel void @global_sextload_v8i1_to_v8i64(ptr addrspace(1) %out,
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i64:
-define amdgpu_kernel void @global_zextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(1) %in
   %ext = zext <16 x i1> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(1) %out
@@ -329,7 +329,7 @@ define amdgpu_kernel void @global_zextload_v16i1_to_v16i64(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i64:
-define amdgpu_kernel void @global_sextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(1) %in
   %ext = sext <16 x i1> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(1) %out
@@ -337,7 +337,7 @@ define amdgpu_kernel void @global_sextload_v16i1_to_v16i64(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i64:
-define amdgpu_kernel void @global_zextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(1) %in
   %ext = zext <32 x i1> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(1) %out
@@ -345,7 +345,7 @@ define amdgpu_kernel void @global_zextload_v32i1_to_v32i64(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i64:
-define amdgpu_kernel void @global_sextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(1) %in
   %ext = sext <32 x i1> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(1) %out
@@ -353,7 +353,7 @@ define amdgpu_kernel void @global_sextload_v32i1_to_v32i64(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i64:
-define amdgpu_kernel void @global_zextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(1) %in
   %ext = zext <64 x i1> %load to <64 x i64>
   store <64 x i64> %ext, ptr addrspace(1) %out
@@ -361,11 +361,9 @@ define amdgpu_kernel void @global_zextload_v64i1_to_v64i64(ptr addrspace(1) %out
 }
 
 ; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i64:
-define amdgpu_kernel void @global_sextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(1) %in
   %ext = sext <64 x i1> %load to <64 x i64>
   store <64 x i64> %ext, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 21e27bfa75531d..6d3abd55809f77 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -619,7 +619,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_load_v16i16_align2(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @global_load_v16i16_align2(ptr addrspace(1) %in, ptr addrspace(1) %out) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_load_v16i16_align2:
 ; GCN-NOHSA-SI:       ; %bb.0: ; %entry
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -801,7 +801,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -886,7 +886,7 @@ define amdgpu_kernel void @global_zextload_i16_to_i32(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_i16_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -974,7 +974,7 @@ define amdgpu_kernel void @global_sextload_i16_to_i32(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1059,7 +1059,7 @@ define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1147,7 +1147,7 @@ define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1246,7 +1246,7 @@ define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(ptr addrspace(1) %out,
 }
 
 ; TODO: This should use ASHR instead of LSHR + BFE
-define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1572,7 +1572,7 @@ entry:
 }
 
 ; TODO: This should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1686,7 +1686,7 @@ define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(ptr addrspace(1) %out,
 
 ; TODO: We should use ASHR instead of LSHR + BFE
 ; TODO: This should use DST, but for some there are redundant MOVs
-define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1804,7 +1804,7 @@ define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(ptr addrspace(1) %out,
 }
 
 ; TODO: These should use LSHR instead of BFE_UINT
-define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1953,7 +1953,7 @@ define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(ptr addrspace(1) %out,
 }
 
 ; TODO: These should use ASHR instead of LSHR + BFE_INT
-define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2105,7 +2105,7 @@ define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -2341,7 +2341,7 @@ define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -2588,7 +2588,7 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -2999,7 +2999,7 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -3443,7 +3443,7 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v64i16_to_v64i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
@@ -4257,7 +4257,7 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v64i16_to_v64i32:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
@@ -5123,7 +5123,7 @@ define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5220,7 +5220,7 @@ define amdgpu_kernel void @global_zextload_i16_to_i64(ptr addrspace(1) %out, ptr
 ;        t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
 
 ; TODO: These could be expanded earlier using ASHR 15
-define amdgpu_kernel void @global_sextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5315,7 +5315,7 @@ define amdgpu_kernel void @global_sextload_i16_to_i64(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5407,7 +5407,7 @@ define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(ptr addrspace(1) %out,
 }
 
 ; TODO: These could be expanded earlier using ASHR 15
-define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5502,7 +5502,7 @@ define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5610,7 +5610,7 @@ define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5724,7 +5724,7 @@ define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -5868,7 +5868,7 @@ define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -6019,7 +6019,7 @@ define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -6237,7 +6237,7 @@ define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -6466,7 +6466,7 @@ define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -6844,7 +6844,7 @@ define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -7240,7 +7240,7 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
@@ -7962,7 +7962,7 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i64:
 ; GCN-NOHSA-SI:       ; %bb.0:
 ; GCN-NOHSA-SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -8707,18 +8707,16 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
   ret void
 }
 
-; define amdgpu_kernel void @global_zextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; define amdgpu_kernel void @global_zextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ;   %load = load <64 x i16>, ptr addrspace(1) %in
 ;   %ext = zext <64 x i16> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(1) %out
 ;   ret void
 ; }
 
-; define amdgpu_kernel void @global_sextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; define amdgpu_kernel void @global_sextload_v64i16_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ;   %load = load <64 x i16>, ptr addrspace(1) %in
 ;   %ext = sext <64 x i16> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(1) %out
 ;   ret void
 ; }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
index 0f9cc33d731f12..12da370c6874cc 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
@@ -6,7 +6,7 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-HSA -check-prefix=GCN-GFX900-HSA %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-HSA -check-prefix=GCN-GFX908-HSA %s
 
-define amdgpu_kernel void @global_load_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -85,7 +85,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_load_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v2i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -164,7 +164,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_load_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v3i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -249,7 +249,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_load_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v4i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -328,7 +328,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_load_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v8i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -431,7 +431,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_load_v9i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v9i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v9i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -562,7 +562,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_load_v10i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v10i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v10i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -691,7 +691,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_load_v11i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v11i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v11i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -827,7 +827,7 @@ entry:
 }
 
 
-define amdgpu_kernel void @global_load_v12i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v12i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v12i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -957,7 +957,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_load_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v16i32:
 ; SI-NOHSA:       ; %bb.0: ; %entry
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1112,7 +1112,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_zextload_i32_to_i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1195,7 +1195,7 @@ define amdgpu_kernel void @global_zextload_i32_to_i64(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_sextload_i32_to_i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1279,7 +1279,7 @@ define amdgpu_kernel void @global_sextload_i32_to_i64(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_zextload_v1i32_to_v1i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1362,7 +1362,7 @@ define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_sextload_v1i32_to_v1i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1446,7 +1446,7 @@ define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_zextload_v2i32_to_v2i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1544,7 +1544,7 @@ define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_sextload_v2i32_to_v2i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1641,7 +1641,7 @@ define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_zextload_v4i32_to_v4i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1766,7 +1766,7 @@ define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_sextload_v4i32_to_v4i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1899,7 +1899,7 @@ define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_zextload_v8i32_to_v8i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2088,7 +2088,7 @@ define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_sextload_v8i32_to_v8i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2300,7 +2300,7 @@ define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_sextload_v16i32_to_v16i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -2671,7 +2671,7 @@ define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_zextload_v16i32_to_v16i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -2988,7 +2988,7 @@ define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_sextload_v32i32_to_v32i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
@@ -3808,7 +3808,7 @@ define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_zextload_v32i32_to_v32i64:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -4386,7 +4386,7 @@ define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @global_load_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-NOHSA-LABEL: global_load_v32i32:
 ; SI-NOHSA:       ; %bb.0:
 ; SI-NOHSA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4635,5 +4635,3 @@ define amdgpu_kernel void @global_load_v32i32(ptr addrspace(1) %out, ptr addrspa
   store <32 x i32> %ld, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i64.ll b/llvm/test/CodeGen/AMDGPU/load-global-i64.ll
index 61ccc17d59eb00..cfb622e46f6a71 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i64.ll
@@ -13,7 +13,7 @@
 ; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
 
 ; EG: VTX_READ_64
-define amdgpu_kernel void @global_load_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %ld = load i64, ptr addrspace(1) %in
   store i64 %ld, ptr addrspace(1) %out
   ret void
@@ -24,7 +24,7 @@ define amdgpu_kernel void @global_load_i64(ptr addrspace(1) %out, ptr addrspace(
 ; GCN-HSA: flat_load_dwordx4
 
 ; EG: VTX_READ_128
-define amdgpu_kernel void @global_load_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <2 x i64>, ptr addrspace(1) %in
   store <2 x i64> %ld, ptr addrspace(1) %out
@@ -40,7 +40,7 @@ entry:
 
 ; EG: VTX_READ_128
 ; EG: VTX_READ_128
-define amdgpu_kernel void @global_load_v3i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v3i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <3 x i64>, ptr addrspace(1) %in
   store <3 x i64> %ld, ptr addrspace(1) %out
@@ -56,7 +56,7 @@ entry:
 
 ; EG: VTX_READ_128
 ; EG: VTX_READ_128
-define amdgpu_kernel void @global_load_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <4 x i64>, ptr addrspace(1) %in
   store <4 x i64> %ld, ptr addrspace(1) %out
@@ -78,7 +78,7 @@ entry:
 ; EG: VTX_READ_128
 ; EG: VTX_READ_128
 ; EG: VTX_READ_128
-define amdgpu_kernel void @global_load_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <8 x i64>, ptr addrspace(1) %in
   store <8 x i64> %ld, ptr addrspace(1) %out
@@ -112,11 +112,9 @@ entry:
 ; EG: VTX_READ_128
 ; EG: VTX_READ_128
 ; EG: VTX_READ_128
-define amdgpu_kernel void @global_load_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <16 x i64>, ptr addrspace(1) %in
   store <16 x i64> %ld, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
index 6ed99f7074b641..20d4d39f7fb371 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
@@ -11,7 +11,7 @@
 
 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
 ; TODO: NOT AND
-define amdgpu_kernel void @global_load_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load i8, ptr addrspace(1) %in
   store i8 %ld, ptr addrspace(1) %out
@@ -23,7 +23,7 @@ entry:
 ; GCN-HSA: flat_load_ushort v
 
 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_load_v2i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v2i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <2 x i8>, ptr addrspace(1) %in
   store <2 x i8> %ld, ptr addrspace(1) %out
@@ -35,7 +35,7 @@ entry:
 ; GCN-HSA: flat_load_dword v
 
 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_load_v3i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v3i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <3 x i8>, ptr addrspace(1) %in
   store <3 x i8> %ld, ptr addrspace(1) %out
@@ -47,7 +47,7 @@ entry:
 ; GCN-HSA: flat_load_dword v
 
 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_load_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <4 x i8>, ptr addrspace(1) %in
   store <4 x i8> %ld, ptr addrspace(1) %out
@@ -59,7 +59,7 @@ entry:
 ; GCN-HSA: flat_load_dwordx2
 
 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_load_v8i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v8i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <8 x i8>, ptr addrspace(1) %in
   store <8 x i8> %ld, ptr addrspace(1) %out
@@ -72,7 +72,7 @@ entry:
 ; GCN-HSA: flat_load_dwordx4
 
 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_load_v16i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_load_v16i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <16 x i8>, ptr addrspace(1) %in
   store <16 x i8> %ld, ptr addrspace(1) %out
@@ -84,7 +84,7 @@ entry:
 ; GCN-HSA: flat_load_ubyte
 
 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i8, ptr addrspace(1) %in
   %ext = zext i8 %a to i32
   store i32 %ext, ptr addrspace(1) %out
@@ -98,7 +98,7 @@ define amdgpu_kernel void @global_zextload_i8_to_i32(ptr addrspace(1) %out, ptr
 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
 ; EG: 8
-define amdgpu_kernel void @global_sextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %ld = load i8, ptr addrspace(1) %in
   %ext = sext i8 %ld to i32
   store i32 %ext, ptr addrspace(1) %out
@@ -108,7 +108,7 @@ define amdgpu_kernel void @global_sextload_i8_to_i32(ptr addrspace(1) %out, ptr
 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32:
 
 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(1) %in
   %ext = zext <1 x i8> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(1) %out
@@ -120,7 +120,7 @@ define amdgpu_kernel void @global_zextload_v1i8_to_v1i32(ptr addrspace(1) %out,
 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
 ; EG: 8
-define amdgpu_kernel void @global_sextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i8_to_v1i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(1) %in
   %ext = sext <1 x i8> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(1) %out
@@ -135,7 +135,7 @@ define amdgpu_kernel void @global_sextload_v1i8_to_v1i32(ptr addrspace(1) %out,
 ; TODO: These should use DST, but for some there are redundant MOVs
 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
 ; EG-DAG: 8
-define amdgpu_kernel void @global_zextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(1) %in
   %ext = zext <2 x i8> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(1) %out
@@ -152,7 +152,7 @@ define amdgpu_kernel void @global_zextload_v2i8_to_v2i32(ptr addrspace(1) %out,
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_sextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i8_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(1) %in
   %ext = sext <2 x i8> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(1) %out
@@ -174,7 +174,7 @@ define amdgpu_kernel void @global_sextload_v2i8_to_v2i32(ptr addrspace(1) %out,
 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_zextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <3 x i8>, ptr addrspace(1) %in
   %ext = zext <3 x i8> %ld to <3 x i32>
@@ -207,7 +207,7 @@ entry:
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_sextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v3i8_to_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %ld = load <3 x i8>, ptr addrspace(1) %in
   %ext = sext <3 x i8> %ld to <3 x i32>
@@ -227,7 +227,7 @@ entry:
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_zextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(1) %in
   %ext = zext <4 x i8> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(1) %out
@@ -248,7 +248,7 @@ define amdgpu_kernel void @global_zextload_v4i8_to_v4i32(ptr addrspace(1) %out,
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_sextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(1) %in
   %ext = sext <4 x i8> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(1) %out
@@ -273,7 +273,7 @@ define amdgpu_kernel void @global_sextload_v4i8_to_v4i32(ptr addrspace(1) %out,
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_zextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(1) %in
   %ext = zext <8 x i8> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(1) %out
@@ -300,7 +300,7 @@ define amdgpu_kernel void @global_zextload_v8i8_to_v8i32(ptr addrspace(1) %out,
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_sextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i8_to_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(1) %in
   %ext = sext <8 x i8> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(1) %out
@@ -341,7 +341,7 @@ define amdgpu_kernel void @global_sextload_v8i8_to_v8i32(ptr addrspace(1) %out,
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(1) %in
   %ext = zext <16 x i8> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(1) %out
@@ -384,7 +384,7 @@ define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(ptr addrspace(1) %out
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_sextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i8_to_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(1) %in
   %ext = sext <16 x i8> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(1) %out
@@ -456,7 +456,7 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i32(ptr addrspace(1) %out
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_zextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(1) %in
   %ext = zext <32 x i8> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(1) %out
@@ -532,7 +532,7 @@ define amdgpu_kernel void @global_zextload_v32i8_to_v32i32(ptr addrspace(1) %out
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
-define amdgpu_kernel void @global_sextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i8_to_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(1) %in
   %ext = sext <32 x i8> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(1) %out
@@ -545,7 +545,7 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i32(ptr addrspace(1) %out
 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
-define amdgpu_kernel void @global_zextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <64 x i8>, ptr addrspace(1) %in
   %ext = zext <64 x i8> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(1) %out
@@ -558,7 +558,7 @@ define amdgpu_kernel void @global_zextload_v64i8_to_v64i32(ptr addrspace(1) %out
 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
-define amdgpu_kernel void @global_sextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v64i8_to_v64i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <64 x i8>, ptr addrspace(1) %in
   %ext = sext <64 x i8> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(1) %out
@@ -576,7 +576,7 @@ define amdgpu_kernel void @global_sextload_v64i8_to_v64i32(ptr addrspace(1) %out
 
 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
 ; EG: MOV {{.*}}, 0.0
-define amdgpu_kernel void @global_zextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i8, ptr addrspace(1) %in
   %ext = zext i8 %a to i64
   store i64 %ext, ptr addrspace(1) %out
@@ -595,7 +595,7 @@ define amdgpu_kernel void @global_zextload_i8_to_i64(ptr addrspace(1) %out, ptr
 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
 ; TODO: Why not 7 ?
 ; EG: 31
-define amdgpu_kernel void @global_sextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i8, ptr addrspace(1) %in
   %ext = sext i8 %a to i64
   store i64 %ext, ptr addrspace(1) %out
@@ -606,7 +606,7 @@ define amdgpu_kernel void @global_sextload_i8_to_i64(ptr addrspace(1) %out, ptr
 
 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
 ; EG: MOV {{.*}}, 0.0
-define amdgpu_kernel void @global_zextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(1) %in
   %ext = zext <1 x i8> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(1) %out
@@ -619,7 +619,7 @@ define amdgpu_kernel void @global_zextload_v1i8_to_v1i64(ptr addrspace(1) %out,
 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
 ; TODO: Why not 7 ?
 ; EG: 31
-define amdgpu_kernel void @global_sextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i8_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(1) %in
   %ext = sext <1 x i8> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(1) %out
@@ -629,7 +629,7 @@ define amdgpu_kernel void @global_sextload_v1i8_to_v1i64(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64:
 
 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(1) %in
   %ext = zext <2 x i8> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(1) %out
@@ -639,7 +639,7 @@ define amdgpu_kernel void @global_zextload_v2i8_to_v2i64(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64:
 
 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_sextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i8_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(1) %in
   %ext = sext <2 x i8> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(1) %out
@@ -649,7 +649,7 @@ define amdgpu_kernel void @global_sextload_v2i8_to_v2i64(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64:
 
 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(1) %in
   %ext = zext <4 x i8> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(1) %out
@@ -659,7 +659,7 @@ define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64:
 
 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(1) %in
   %ext = sext <4 x i8> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(1) %out
@@ -669,7 +669,7 @@ define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64:
 
 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(1) %in
   %ext = zext <8 x i8> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(1) %out
@@ -679,7 +679,7 @@ define amdgpu_kernel void @global_zextload_v8i8_to_v8i64(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64:
 
 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_sextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i8_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(1) %in
   %ext = sext <8 x i8> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(1) %out
@@ -689,7 +689,7 @@ define amdgpu_kernel void @global_sextload_v8i8_to_v8i64(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64:
 
 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(1) %in
   %ext = zext <16 x i8> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(1) %out
@@ -699,7 +699,7 @@ define amdgpu_kernel void @global_zextload_v16i8_to_v16i64(ptr addrspace(1) %out
 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64:
 
 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(1) %in
   %ext = sext <16 x i8> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(1) %out
@@ -710,7 +710,7 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(ptr addrspace(1) %out
 
 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define amdgpu_kernel void @global_zextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(1) %in
   %ext = zext <32 x i8> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(1) %out
@@ -721,7 +721,7 @@ define amdgpu_kernel void @global_zextload_v32i8_to_v32i64(ptr addrspace(1) %out
 
 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(1) %in
   %ext = sext <32 x i8> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(1) %out
@@ -729,7 +729,7 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
 }
 
 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64:
-; define amdgpu_kernel void @global_zextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; define amdgpu_kernel void @global_zextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(1) %in
 ;   %ext = zext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(1) %out
@@ -737,7 +737,7 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
 ; }
 
 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64:
-; define amdgpu_kernel void @global_sextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; define amdgpu_kernel void @global_sextload_v64i8_to_v64i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(1) %in
 ;   %ext = sext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(1) %out
@@ -752,7 +752,7 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(ptr addrspace(1) %out
 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
 
 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i8, ptr addrspace(1) %in
   %ext = zext i8 %a to i16
   store i16 %ext, ptr addrspace(1) %out
@@ -768,7 +768,7 @@ define amdgpu_kernel void @global_zextload_i8_to_i16(ptr addrspace(1) %out, ptr
 
 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @global_sextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_i8_to_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i8, ptr addrspace(1) %in
   %ext = sext i8 %a to i16
   store i16 %ext, ptr addrspace(1) %out
@@ -778,7 +778,7 @@ define amdgpu_kernel void @global_sextload_i8_to_i16(ptr addrspace(1) %out, ptr
 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16:
 
 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(1) %in
   %ext = zext <1 x i8> %load to <1 x i16>
   store <1 x i16> %ext, ptr addrspace(1) %out
@@ -789,7 +789,7 @@ define amdgpu_kernel void @global_zextload_v1i8_to_v1i16(ptr addrspace(1) %out,
 
 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @global_sextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v1i8_to_v1i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(1) %in
   %ext = sext <1 x i8> %load to <1 x i16>
   store <1 x i16> %ext, ptr addrspace(1) %out
@@ -799,7 +799,7 @@ define amdgpu_kernel void @global_sextload_v1i8_to_v1i16(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16:
 
 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(1) %in
   %ext = zext <2 x i8> %load to <2 x i16>
   store <2 x i16> %ext, ptr addrspace(1) %out
@@ -811,7 +811,7 @@ define amdgpu_kernel void @global_zextload_v2i8_to_v2i16(ptr addrspace(1) %out,
 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @global_sextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v2i8_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(1) %in
   %ext = sext <2 x i8> %load to <2 x i16>
   store <2 x i16> %ext, ptr addrspace(1) %out
@@ -821,7 +821,7 @@ define amdgpu_kernel void @global_sextload_v2i8_to_v2i16(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16:
 
 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(1) %in
   %ext = zext <4 x i8> %load to <4 x i16>
   store <4 x i16> %ext, ptr addrspace(1) %out
@@ -835,7 +835,7 @@ define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(ptr addrspace(1) %out,
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(1) %in
   %ext = sext <4 x i8> %load to <4 x i16>
   store <4 x i16> %ext, ptr addrspace(1) %out
@@ -845,7 +845,7 @@ define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16:
 
 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(1) %in
   %ext = zext <8 x i8> %load to <8 x i16>
   store <8 x i16> %ext, ptr addrspace(1) %out
@@ -863,7 +863,7 @@ define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(ptr addrspace(1) %out,
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(1) %in
   %ext = sext <8 x i8> %load to <8 x i16>
   store <8 x i16> %ext, ptr addrspace(1) %out
@@ -873,7 +873,7 @@ define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16:
 
 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
-define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(1) %in
   %ext = zext <16 x i8> %load to <16 x i16>
   store <16 x i16> %ext, ptr addrspace(1) %out
@@ -899,7 +899,7 @@ define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(ptr addrspace(1) %out
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(1) %in
   %ext = sext <16 x i8> %load to <16 x i16>
   store <16 x i16> %ext, ptr addrspace(1) %out
@@ -910,7 +910,7 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(ptr addrspace(1) %out
 
 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
-define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(1) %in
   %ext = zext <32 x i8> %load to <32 x i16>
   store <32 x i16> %ext, ptr addrspace(1) %out
@@ -953,7 +953,7 @@ define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(ptr addrspace(1) %out
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(1) %in
   %ext = sext <32 x i8> %load to <32 x i16>
   store <32 x i16> %ext, ptr addrspace(1) %out
@@ -961,7 +961,7 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(ptr addrspace(1) %out
 }
 
 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16:
-; define amdgpu_kernel void @global_zextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; define amdgpu_kernel void @global_zextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(1) %in
 ;   %ext = zext <64 x i8> %load to <64 x i16>
 ;   store <64 x i16> %ext, ptr addrspace(1) %out
@@ -969,11 +969,9 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(ptr addrspace(1) %out
 ; }
 
 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16:
-; define amdgpu_kernel void @global_sextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; define amdgpu_kernel void @global_sextload_v64i8_to_v64i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(1) %in
 ;   %ext = sext <64 x i8> %load to <64 x i16>
 ;   store <64 x i16> %ext, ptr addrspace(1) %out
 ;   ret void
 ; }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-hi16.ll b/llvm/test/CodeGen/AMDGPU/load-hi16.ll
index 0c61c58ef06192..76c321f89efe95 100644
--- a/llvm/test/CodeGen/AMDGPU/load-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-hi16.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX803 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900-FLATSCR %s
 
-define <2 x i16> @load_local_lo_hi_v2i16_multi_use_lo(ptr addrspace(3) noalias %in) #0 {
+define <2 x i16> @load_local_lo_hi_v2i16_multi_use_lo(ptr addrspace(3) noalias %in) nounwind {
 ; GFX900-LABEL: load_local_lo_hi_v2i16_multi_use_lo:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -71,7 +71,7 @@ entry:
   ret <2 x i16> %build1
 }
 
-define <2 x i16> @load_local_lo_hi_v2i16_multi_use_hi(ptr addrspace(3) noalias %in) #0 {
+define <2 x i16> @load_local_lo_hi_v2i16_multi_use_hi(ptr addrspace(3) noalias %in) nounwind {
 ; GFX900-LABEL: load_local_lo_hi_v2i16_multi_use_hi:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -138,7 +138,7 @@ entry:
   ret <2 x i16> %build1
 }
 
-define <2 x i16> @load_local_lo_hi_v2i16_multi_use_lohi(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out0, ptr addrspace(3) noalias %out1) #0 {
+define <2 x i16> @load_local_lo_hi_v2i16_multi_use_lohi(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out0, ptr addrspace(3) noalias %out1) nounwind {
 ; GFX900-LABEL: load_local_lo_hi_v2i16_multi_use_lohi:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -206,7 +206,7 @@ entry:
   ret <2 x i16> %build1
 }
 
-define <2 x i16> @load_local_hi_v2i16_undeflo(ptr addrspace(3) %in) #0 {
+define <2 x i16> @load_local_hi_v2i16_undeflo(ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_hi_v2i16_undeflo:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -243,7 +243,7 @@ entry:
   ret <2 x i16> %build
 }
 
-define <2 x i16> @load_local_hi_v2i16_reglo(ptr addrspace(3) %in, i16 %reg) #0 {
+define <2 x i16> @load_local_hi_v2i16_reglo(ptr addrspace(3) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_local_hi_v2i16_reglo:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -285,7 +285,7 @@ entry:
   ret <2 x i16> %build1
 }
 
-define void @load_local_hi_v2i16_reglo_vreg(ptr addrspace(3) %in, i16 %reg) #0 {
+define void @load_local_hi_v2i16_reglo_vreg(ptr addrspace(3) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_local_hi_v2i16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -334,7 +334,7 @@ entry:
   ret void
 }
 
-define <2 x i16> @load_local_hi_v2i16_zerolo(ptr addrspace(3) %in) #0 {
+define <2 x i16> @load_local_hi_v2i16_zerolo(ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_hi_v2i16_zerolo:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -376,7 +376,7 @@ entry:
 }
 
 ; FIXME: Remove m0 initialization
-define i32 @load_local_hi_v2i16_zerolo_shift(ptr addrspace(3) %in) #0 {
+define i32 @load_local_hi_v2i16_zerolo_shift(ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_hi_v2i16_zerolo_shift:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -416,7 +416,7 @@ entry:
   ret i32 %shift
 }
 
-define void @load_local_hi_v2f16_reglo_vreg(ptr addrspace(3) %in, half %reg) #0 {
+define void @load_local_hi_v2f16_reglo_vreg(ptr addrspace(3) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_local_hi_v2f16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -465,7 +465,7 @@ entry:
   ret void
 }
 
-define void @load_local_hi_v2i16_reglo_vreg_zexti8(ptr addrspace(3) %in, i16 %reg) #0 {
+define void @load_local_hi_v2i16_reglo_vreg_zexti8(ptr addrspace(3) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_local_hi_v2i16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -515,7 +515,7 @@ entry:
   ret void
 }
 
-define void @load_local_hi_v2i16_reglo_vreg_sexti8(ptr addrspace(3) %in, i16 %reg) #0 {
+define void @load_local_hi_v2i16_reglo_vreg_sexti8(ptr addrspace(3) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_local_hi_v2i16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -565,7 +565,7 @@ entry:
   ret void
 }
 
-define void @load_local_hi_v2f16_reglo_vreg_zexti8(ptr addrspace(3) %in, half %reg) #0 {
+define void @load_local_hi_v2f16_reglo_vreg_zexti8(ptr addrspace(3) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_local_hi_v2f16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -617,7 +617,7 @@ entry:
   ret void
 }
 
-define void @load_local_hi_v2f16_reglo_vreg_sexti8(ptr addrspace(3) %in, half %reg) #0 {
+define void @load_local_hi_v2f16_reglo_vreg_sexti8(ptr addrspace(3) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_local_hi_v2f16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -669,7 +669,7 @@ entry:
   ret void
 }
 
-define void @load_global_hi_v2i16_reglo_vreg(ptr addrspace(1) %in, i16 %reg) #0 {
+define void @load_global_hi_v2i16_reglo_vreg(ptr addrspace(1) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_global_hi_v2i16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -720,7 +720,7 @@ entry:
   ret void
 }
 
-define void @load_global_hi_v2f16_reglo_vreg(ptr addrspace(1) %in, half %reg) #0 {
+define void @load_global_hi_v2f16_reglo_vreg(ptr addrspace(1) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_global_hi_v2f16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -771,7 +771,7 @@ entry:
   ret void
 }
 
-define void @load_global_hi_v2i16_reglo_vreg_zexti8(ptr addrspace(1) %in, i16 %reg) #0 {
+define void @load_global_hi_v2i16_reglo_vreg_zexti8(ptr addrspace(1) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_global_hi_v2i16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -823,7 +823,7 @@ entry:
   ret void
 }
 
-define void @load_global_hi_v2i16_reglo_vreg_sexti8(ptr addrspace(1) %in, i16 %reg) #0 {
+define void @load_global_hi_v2i16_reglo_vreg_sexti8(ptr addrspace(1) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_global_hi_v2i16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -875,7 +875,7 @@ entry:
   ret void
 }
 
-define void @load_global_hi_v2f16_reglo_vreg_sexti8(ptr addrspace(1) %in, half %reg) #0 {
+define void @load_global_hi_v2f16_reglo_vreg_sexti8(ptr addrspace(1) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_global_hi_v2f16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -928,7 +928,7 @@ entry:
   ret void
 }
 
-define void @load_global_hi_v2f16_reglo_vreg_zexti8(ptr addrspace(1) %in, half %reg) #0 {
+define void @load_global_hi_v2f16_reglo_vreg_zexti8(ptr addrspace(1) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_global_hi_v2f16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -981,7 +981,7 @@ entry:
   ret void
 }
 
-define void @load_flat_hi_v2i16_reglo_vreg(ptr %in, i16 %reg) #0 {
+define void @load_flat_hi_v2i16_reglo_vreg(ptr %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_flat_hi_v2i16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1029,7 +1029,7 @@ entry:
   ret void
 }
 
-define void @load_flat_hi_v2f16_reglo_vreg(ptr %in, half %reg) #0 {
+define void @load_flat_hi_v2f16_reglo_vreg(ptr %in, half %reg) nounwind {
 ; GFX900-LABEL: load_flat_hi_v2f16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1077,7 +1077,7 @@ entry:
   ret void
 }
 
-define void @load_flat_hi_v2i16_reglo_vreg_zexti8(ptr %in, i16 %reg) #0 {
+define void @load_flat_hi_v2i16_reglo_vreg_zexti8(ptr %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_flat_hi_v2i16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1126,7 +1126,7 @@ entry:
   ret void
 }
 
-define void @load_flat_hi_v2i16_reglo_vreg_sexti8(ptr %in, i16 %reg) #0 {
+define void @load_flat_hi_v2i16_reglo_vreg_sexti8(ptr %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_flat_hi_v2i16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1175,7 +1175,7 @@ entry:
   ret void
 }
 
-define void @load_flat_hi_v2f16_reglo_vreg_zexti8(ptr %in, half %reg) #0 {
+define void @load_flat_hi_v2f16_reglo_vreg_zexti8(ptr %in, half %reg) nounwind {
 ; GFX900-LABEL: load_flat_hi_v2f16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1225,7 +1225,7 @@ entry:
   ret void
 }
 
-define void @load_flat_hi_v2f16_reglo_vreg_sexti8(ptr %in, half %reg) #0 {
+define void @load_flat_hi_v2f16_reglo_vreg_sexti8(ptr %in, half %reg) nounwind {
 ; GFX900-LABEL: load_flat_hi_v2f16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1275,7 +1275,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2i16_reglo_vreg(ptr addrspace(5) byval(i16) %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg(ptr addrspace(5) byval(i16) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2i16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1324,7 +1324,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2f16_reglo_vreg(ptr addrspace(5) byval(half) %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg(ptr addrspace(5) byval(half) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2f16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1373,7 +1373,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2i16_reglo_vreg_nooff(ptr addrspace(5) byval(i16) %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_nooff(ptr addrspace(5) byval(i16) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2i16_reglo_vreg_nooff:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1422,7 +1422,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2f16_reglo_vreg_nooff(ptr addrspace(5) %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg_nooff(ptr addrspace(5) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2f16_reglo_vreg_nooff:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1471,7 +1471,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2i16_reglo_vreg_zexti8(ptr addrspace(5) byval(i8) %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_zexti8(ptr addrspace(5) byval(i8) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2i16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1521,7 +1521,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2f16_reglo_vreg_zexti8(ptr addrspace(5) byval(i8) %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg_zexti8(ptr addrspace(5) byval(i8) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2f16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1572,7 +1572,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2f16_reglo_vreg_sexti8(ptr addrspace(5) byval(i8) %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg_sexti8(ptr addrspace(5) byval(i8) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2f16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1623,7 +1623,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2i16_reglo_vreg_sexti8(ptr addrspace(5) byval(i8) %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_sexti8(ptr addrspace(5) byval(i8) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2i16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1673,7 +1673,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2i16_reglo_vreg_nooff_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1723,7 +1723,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(ptr addrspace(5) %in, i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(ptr addrspace(5) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2i16_reglo_vreg_nooff_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1773,7 +1773,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, half %reg) #0 {
+define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_private_hi_v2f16_reglo_vreg_nooff_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1824,7 +1824,7 @@ entry:
   ret void
 }
 
-define void @load_constant_hi_v2i16_reglo_vreg(ptr addrspace(4) %in, i16 %reg) #0 {
+define void @load_constant_hi_v2i16_reglo_vreg(ptr addrspace(4) %in, i16 %reg) nounwind {
 ; GFX900-LABEL: load_constant_hi_v2i16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1875,7 +1875,7 @@ entry:
   ret void
 }
 
-define void @load_constant_hi_v2f16_reglo_vreg(ptr addrspace(4) %in, half %reg) #0 {
+define void @load_constant_hi_v2f16_reglo_vreg(ptr addrspace(4) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_constant_hi_v2f16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1926,7 +1926,7 @@ entry:
   ret void
 }
 
-define void @load_constant_hi_v2f16_reglo_vreg_sexti8(ptr addrspace(4) %in, half %reg) #0 {
+define void @load_constant_hi_v2f16_reglo_vreg_sexti8(ptr addrspace(4) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_constant_hi_v2f16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1979,7 +1979,7 @@ entry:
   ret void
 }
 
-define void @load_constant_hi_v2f16_reglo_vreg_zexti8(ptr addrspace(4) %in, half %reg) #0 {
+define void @load_constant_hi_v2f16_reglo_vreg_zexti8(ptr addrspace(4) %in, half %reg) nounwind {
 ; GFX900-LABEL: load_constant_hi_v2f16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2035,7 +2035,7 @@ entry:
 ; Local object gives known offset, so requires converting from offen
 ; to offset variant.
 
-define void @load_private_hi_v2i16_reglo_vreg_to_offset(i16 %reg, ptr addrspace(5) %obj0) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_to_offset(i16 %reg, ptr addrspace(5) %obj0) nounwind {
 ; GFX900-LABEL: load_private_hi_v2i16_reglo_vreg_to_offset:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2098,7 +2098,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2i16_reglo_vreg_sexti8_to_offset(i16 %reg, ptr addrspace(5) %obj0) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_sexti8_to_offset(i16 %reg, ptr addrspace(5) %obj0) nounwind {
 ; GFX900-LABEL: load_private_hi_v2i16_reglo_vreg_sexti8_to_offset:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2162,7 +2162,7 @@ entry:
   ret void
 }
 
-define void @load_private_hi_v2i16_reglo_vreg_zexti8_to_offset(i16 %reg, ptr addrspace(5) %obj0) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_zexti8_to_offset(i16 %reg, ptr addrspace(5) %obj0) nounwind {
 ; GFX900-LABEL: load_private_hi_v2i16_reglo_vreg_zexti8_to_offset:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2228,7 +2228,7 @@ entry:
 
 ; FIXME: Remove m0 init and waitcnt between reads
 ; FIXME: Is there a cost to using the extload over not?
-define <2 x i16> @load_local_v2i16_split_multi_chain(ptr addrspace(3) %in) #0 {
+define <2 x i16> @load_local_v2i16_split_multi_chain(ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_v2i16_split_multi_chain:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2278,7 +2278,7 @@ entry:
   ret <2 x i16> %build1
 }
 
-define <2 x i16> @load_local_lo_hi_v2i16_samechain(ptr addrspace(3) %in) #0 {
+define <2 x i16> @load_local_lo_hi_v2i16_samechain(ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_lo_hi_v2i16_samechain:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2330,7 +2330,7 @@ entry:
 }
 
 ; FIXME: Remove and
-define <2 x i16> @load_local_v2i16_broadcast(ptr addrspace(3) %in) #0 {
+define <2 x i16> @load_local_v2i16_broadcast(ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_v2i16_broadcast:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2375,7 +2375,7 @@ entry:
   ret <2 x i16> %build1
 }
 
-define <2 x i16> @load_local_lo_hi_v2i16_side_effect(ptr addrspace(3) %in, ptr addrspace(3) %may.alias) #0 {
+define <2 x i16> @load_local_lo_hi_v2i16_side_effect(ptr addrspace(3) %in, ptr addrspace(3) %may.alias) nounwind {
 ; GFX900-LABEL: load_local_lo_hi_v2i16_side_effect:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2435,7 +2435,7 @@ entry:
 }
 
 ; FIXME: Remove waitcnt between reads
-define <2 x i16> @load_global_v2i16_split(ptr addrspace(1) %in) #0 {
+define <2 x i16> @load_global_v2i16_split(ptr addrspace(1) %in) nounwind {
 ; GFX900-LABEL: load_global_v2i16_split:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2489,7 +2489,7 @@ entry:
 }
 
 ; FIXME: Remove waitcnt between reads
-define <2 x i16> @load_flat_v2i16_split(ptr %in) #0 {
+define <2 x i16> @load_flat_v2i16_split(ptr %in) nounwind {
 ; GFX900-LABEL: load_flat_v2i16_split:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2544,7 +2544,7 @@ entry:
 }
 
 ; FIXME: Remove waitcnt between reads
-define <2 x i16> @load_constant_v2i16_split(ptr addrspace(4) %in) #0 {
+define <2 x i16> @load_constant_v2i16_split(ptr addrspace(4) %in) nounwind {
 ; GFX900-LABEL: load_constant_v2i16_split:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2597,7 +2597,7 @@ entry:
 
 ; FIXME: Remove m0 init and waitcnt between reads
 ; FIXME: Is there a cost to using the extload over not?
-define <2 x i16> @load_private_v2i16_split(ptr addrspace(5) byval(i16) %in) #0 {
+define <2 x i16> @load_private_v2i16_split(ptr addrspace(5) byval(i16) %in) nounwind {
 ; GFX900-LABEL: load_private_v2i16_split:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2649,7 +2649,7 @@ entry:
 ; FIXME: This test should work without copying of v0.
 ;        ds_read_u16_d16_hi preserves low 16 bits of the destination
 ;        and ds_write_b16 only reads low 16 bits.
-define <2 x i16> @load_local_hi_v2i16_store_local_lo(i16 %reg, ptr addrspace(3) %in) #0 {
+define <2 x i16> @load_local_hi_v2i16_store_local_lo(i16 %reg, ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_hi_v2i16_store_local_lo:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2703,5 +2703,3 @@ entry:
   store volatile i16 %reg, ptr addrspace(3) %in
   ret <2 x i16> %build1
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-input-fold.ll b/llvm/test/CodeGen/AMDGPU/load-input-fold.ll
index a243e6f5387228..8d847cf43b6c68 100644
--- a/llvm/test/CodeGen/AMDGPU/load-input-fold.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-input-fold.ll
@@ -95,13 +95,9 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
+declare float @llvm.r600.dot4(<4 x float>, <4 x float>) readnone
 
 ; Function Attrs: nounwind readonly
-declare float @llvm.pow.f32(float, float) #3
+declare float @llvm.pow.f32(float, float) nounwind readonly
 
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #1 = { readnone }
-attributes #2 = { readonly }
-attributes #3 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/load-lo16.ll b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
index 3ef86c13e150ac..c90fd00bc0b861 100644
--- a/llvm/test/CodeGen/AMDGPU/load-lo16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck --check-prefix=GFX803 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX900,GFX900-FLATSCR %s
 
-define <2 x i16> @load_local_lo_v2i16_undeflo(ptr addrspace(3) %in) #0 {
+define <2 x i16> @load_local_lo_v2i16_undeflo(ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_lo_v2i16_undeflo:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -32,7 +32,7 @@ entry:
   ret <2 x i16> %build
 }
 
-define <2 x i16> @load_local_lo_v2i16_reglo(ptr addrspace(3) %in, i16 %reg) #0 {
+define <2 x i16> @load_local_lo_v2i16_reglo(ptr addrspace(3) %in, i16 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reglo:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -77,7 +77,7 @@ entry:
 }
 
 ; Show that we get reasonable regalloc without physreg constraints.
-define void @load_local_lo_v2i16_reglo_vreg(ptr addrspace(3) %in, i16 %reg) #0 {
+define void @load_local_lo_v2i16_reglo_vreg(ptr addrspace(3) %in, i16 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reglo_vreg:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -130,7 +130,7 @@ entry:
   ret void
 }
 
-define <2 x i16> @load_local_lo_v2i16_zerolo(ptr addrspace(3) %in) #0 {
+define <2 x i16> @load_local_lo_v2i16_zerolo(ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_lo_v2i16_zerolo:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -161,7 +161,7 @@ entry:
   ret <2 x i16> %build
 }
 
-define <2 x half> @load_local_lo_v2f16_fpimm(ptr addrspace(3) %in) #0 {
+define <2 x half> @load_local_lo_v2f16_fpimm(ptr addrspace(3) %in) nounwind {
 ; GFX900-LABEL: load_local_lo_v2f16_fpimm:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -195,7 +195,7 @@ entry:
   ret <2 x half> %build
 }
 
-define void @load_local_lo_v2f16_reghi_vreg(ptr addrspace(3) %in, i32 %reg) #0 {
+define void @load_local_lo_v2f16_reghi_vreg(ptr addrspace(3) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_local_lo_v2f16_reghi_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -235,7 +235,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2f16_reglo_vreg(ptr addrspace(3) %in, half %reg) #0 {
+define void @load_local_lo_v2f16_reglo_vreg(ptr addrspace(3) %in, half %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_local_lo_v2f16_reglo_vreg:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -288,7 +288,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2i16_reghi_vreg_zexti8(ptr addrspace(3) %in, i32 %reg) #0 {
+define void @load_local_lo_v2i16_reghi_vreg_zexti8(ptr addrspace(3) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -329,7 +329,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2i16_reglo_vreg_zexti8(ptr addrspace(3) %in, i16 %reg) #0 {
+define void @load_local_lo_v2i16_reglo_vreg_zexti8(ptr addrspace(3) %in, i16 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reglo_vreg_zexti8:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -383,7 +383,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2i16_reghi_vreg_sexti8(ptr addrspace(3) %in, i32 %reg) #0 {
+define void @load_local_lo_v2i16_reghi_vreg_sexti8(ptr addrspace(3) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -424,7 +424,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2i16_reglo_vreg_sexti8(ptr addrspace(3) %in, i16 %reg) #0 {
+define void @load_local_lo_v2i16_reglo_vreg_sexti8(ptr addrspace(3) %in, i16 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reglo_vreg_sexti8:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -478,7 +478,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2f16_reglo_vreg_zexti8(ptr addrspace(3) %in, half %reg) #0 {
+define void @load_local_lo_v2f16_reglo_vreg_zexti8(ptr addrspace(3) %in, half %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_local_lo_v2f16_reglo_vreg_zexti8:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -533,7 +533,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2f16_reglo_vreg_sexti8(ptr addrspace(3) %in, half %reg) #0 {
+define void @load_local_lo_v2f16_reglo_vreg_sexti8(ptr addrspace(3) %in, half %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_local_lo_v2f16_reglo_vreg_sexti8:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -588,7 +588,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(ptr addrspace(3) %in, <2 x i16> %reg) #0 {
+define void @load_local_lo_v2i16_reghi_vreg_multi_use_lo(ptr addrspace(3) %in, <2 x i16> %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lo:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -650,7 +650,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(ptr addrspace(3) %in, <2 x i16> %reg) #0 {
+define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(ptr addrspace(3) %in, <2 x i16> %reg) nounwind {
 ; GFX900-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_hi:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -700,7 +700,7 @@ entry:
   ret void
 }
 
-define void @load_local_lo_v2i16_reghi_vreg_multi_use_lohi(ptr addrspace(3) noalias %in, <2 x i16> %reg, ptr addrspace(3) noalias %out0, ptr addrspace(3) noalias %out1) #0 {
+define void @load_local_lo_v2i16_reghi_vreg_multi_use_lohi(ptr addrspace(3) noalias %in, <2 x i16> %reg, ptr addrspace(3) noalias %out0, ptr addrspace(3) noalias %out1) nounwind {
 ; GFX900-MUBUF-LABEL: load_local_lo_v2i16_reghi_vreg_multi_use_lohi:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -767,7 +767,7 @@ entry:
   ret void
 }
 
-define void @load_global_lo_v2i16_reglo_vreg(ptr addrspace(1) %in, i32 %reg) #0 {
+define void @load_global_lo_v2i16_reglo_vreg(ptr addrspace(1) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -809,7 +809,7 @@ entry:
   ret void
 }
 
-define void @load_global_lo_v2f16_reglo_vreg(ptr addrspace(1) %in, i32 %reg) #0 {
+define void @load_global_lo_v2f16_reglo_vreg(ptr addrspace(1) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -851,7 +851,7 @@ entry:
   ret void
 }
 
-define void @load_global_lo_v2i16_reglo_vreg_zexti8(ptr addrspace(1) %in, i32 %reg) #0 {
+define void @load_global_lo_v2i16_reglo_vreg_zexti8(ptr addrspace(1) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -894,7 +894,7 @@ entry:
   ret void
 }
 
-define void @load_global_lo_v2i16_reglo_vreg_sexti8(ptr addrspace(1) %in, i32 %reg) #0 {
+define void @load_global_lo_v2i16_reglo_vreg_sexti8(ptr addrspace(1) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_global_lo_v2i16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -937,7 +937,7 @@ entry:
   ret void
 }
 
-define void @load_global_lo_v2f16_reglo_vreg_zexti8(ptr addrspace(1) %in, i32 %reg) #0 {
+define void @load_global_lo_v2f16_reglo_vreg_zexti8(ptr addrspace(1) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -981,7 +981,7 @@ entry:
   ret void
 }
 
-define void @load_global_lo_v2f16_reglo_vreg_sexti8(ptr addrspace(1) %in, i32 %reg) #0 {
+define void @load_global_lo_v2f16_reglo_vreg_sexti8(ptr addrspace(1) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_global_lo_v2f16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1025,7 +1025,7 @@ entry:
   ret void
 }
 
-define void @load_flat_lo_v2i16_reghi_vreg(ptr %in, i32 %reg) #0 {
+define void @load_flat_lo_v2i16_reghi_vreg(ptr %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_flat_lo_v2i16_reghi_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1064,7 +1064,7 @@ entry:
   ret void
 }
 
-define void @load_flat_lo_v2f16_reghi_vreg(ptr %in, i32 %reg) #0 {
+define void @load_flat_lo_v2f16_reghi_vreg(ptr %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_flat_lo_v2f16_reghi_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1105,7 +1105,7 @@ entry:
   ret void
 }
 
-define void @load_flat_lo_v2i16_reglo_vreg_zexti8(ptr %in, i32 %reg) #0 {
+define void @load_flat_lo_v2i16_reglo_vreg_zexti8(ptr %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1145,7 +1145,7 @@ entry:
   ret void
 }
 
-define void @load_flat_lo_v2i16_reglo_vreg_sexti8(ptr %in, i32 %reg) #0 {
+define void @load_flat_lo_v2i16_reglo_vreg_sexti8(ptr %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_flat_lo_v2i16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1185,7 +1185,7 @@ entry:
   ret void
 }
 
-define void @load_flat_lo_v2f16_reglo_vreg_zexti8(ptr %in, i32 %reg) #0 {
+define void @load_flat_lo_v2f16_reglo_vreg_zexti8(ptr %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1226,7 +1226,7 @@ entry:
   ret void
 }
 
-define void @load_flat_lo_v2f16_reglo_vreg_sexti8(ptr %in, i32 %reg) #0 {
+define void @load_flat_lo_v2f16_reglo_vreg_sexti8(ptr %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_flat_lo_v2f16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1267,7 +1267,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reglo_vreg(ptr addrspace(5) byval(i16) %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg(ptr addrspace(5) byval(i16) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1316,7 +1316,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reghi_vreg(ptr addrspace(5) byval(i16) %in, i16 %reg) #0 {
+define void @load_private_lo_v2i16_reghi_vreg(ptr addrspace(5) byval(i16) %in, i16 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1369,7 +1369,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2f16_reglo_vreg(ptr addrspace(5) byval(half) %in, i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg(ptr addrspace(5) byval(half) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1418,7 +1418,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reglo_vreg_nooff(ptr addrspace(5) %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_nooff(ptr addrspace(5) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1467,7 +1467,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reghi_vreg_nooff(ptr addrspace(5) %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reghi_vreg_nooff(ptr addrspace(5) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reghi_vreg_nooff:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1516,7 +1516,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2f16_reglo_vreg_nooff(ptr addrspace(5) %in, i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg_nooff(ptr addrspace(5) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1565,7 +1565,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reglo_vreg_zexti8(ptr addrspace(5) byval(i8) %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_zexti8(ptr addrspace(5) byval(i8) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1615,7 +1615,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reglo_vreg_sexti8(ptr addrspace(5) byval(i8) %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_sexti8(ptr addrspace(5) byval(i8) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1665,7 +1665,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1715,7 +1715,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(ptr addrspace(5) %in, i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(ptr addrspace(5) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1765,7 +1765,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(ptr addrspace(5) %in, i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1816,7 +1816,7 @@ entry:
   ret void
 }
 
-define void @load_constant_lo_v2i16_reglo_vreg(ptr addrspace(4) %in, i32 %reg) #0 {
+define void @load_constant_lo_v2i16_reglo_vreg(ptr addrspace(4) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_constant_lo_v2i16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1858,7 +1858,7 @@ entry:
   ret void
 }
 
-define void @load_constant_lo_v2f16_reglo_vreg(ptr addrspace(4) %in, i32 %reg) #0 {
+define void @load_constant_lo_v2f16_reglo_vreg(ptr addrspace(4) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1900,7 +1900,7 @@ entry:
   ret void
 }
 
-define void @load_constant_lo_v2f16_reglo_vreg_zexti8(ptr addrspace(4) %in, i32 %reg) #0 {
+define void @load_constant_lo_v2f16_reglo_vreg_zexti8(ptr addrspace(4) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_zexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1944,7 +1944,7 @@ entry:
   ret void
 }
 
-define void @load_constant_lo_v2f16_reglo_vreg_sexti8(ptr addrspace(4) %in, i32 %reg) #0 {
+define void @load_constant_lo_v2f16_reglo_vreg_sexti8(ptr addrspace(4) %in, i32 %reg) nounwind {
 ; GFX900-LABEL: load_constant_lo_v2f16_reglo_vreg_sexti8:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1988,7 +1988,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_to_offset:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2056,7 +2056,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2125,7 +2125,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
+define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2194,7 +2194,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2f16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg_sexti8_to_offset(i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2264,7 +2264,7 @@ entry:
   ret void
 }
 
-define void @load_private_lo_v2f16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
+define void @load_private_lo_v2f16_reglo_vreg_zexti8_to_offset(i32 %reg) nounwind {
 ; GFX900-MUBUF-LABEL: load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
 ; GFX900-MUBUF:       ; %bb.0: ; %entry
 ; GFX900-MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2333,5 +2333,3 @@ entry:
   store <2 x half> %build1, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-f32-no-ds128.ll b/llvm/test/CodeGen/AMDGPU/load-local-f32-no-ds128.ll
index a018fcf9a1d62c..ea8a436668ecbb 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-f32-no-ds128.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-f32-no-ds128.ll
@@ -16,5 +16,3 @@ define amdgpu_kernel void @local_v4f32_to_2b64(ptr addrspace(3) %out, ptr addrsp
   store <4 x float> %ld, ptr addrspace(3) %out, align 16
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-f32.ll b/llvm/test/CodeGen/AMDGPU/load-local-f32.ll
index 61b11678c7ed5f..6d5dab08c7a699 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-f32.ll
@@ -13,7 +13,7 @@
 ; GCN: ds_read_b32
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @load_f32_local(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @load_f32_local(ptr addrspace(1) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %tmp0 = load float, ptr addrspace(3) %in
   store float %tmp0, ptr addrspace(1) %out
@@ -28,7 +28,7 @@ entry:
 
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @load_v2f32_local(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @load_v2f32_local(ptr addrspace(1) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %tmp0 = load <2 x float>, ptr addrspace(3) %in
   store <2 x float> %tmp0, ptr addrspace(1) %out
@@ -51,7 +51,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v3f32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v3f32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %tmp0 = load <3 x float>, ptr addrspace(3) %in
   store <3 x float> %tmp0, ptr addrspace(3) %out
@@ -68,7 +68,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v4f32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v4f32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %tmp0 = load <4 x float>, ptr addrspace(3) %in
   store <4 x float> %tmp0, ptr addrspace(3) %out
@@ -90,7 +90,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v8f32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v8f32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %tmp0 = load <8 x float>, ptr addrspace(3) %in
   store <8 x float> %tmp0, ptr addrspace(3) %out
@@ -122,7 +122,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v16f32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v16f32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %tmp0 = load <16 x float>, ptr addrspace(3) %in
   store <16 x float> %tmp0, ptr addrspace(3) %out
@@ -147,5 +147,3 @@ define amdgpu_kernel void @local_v4f32_to_128(ptr addrspace(3) %out, ptr addrspa
   store <4 x float> %ld, ptr addrspace(3) %out, align 16
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-f64.ll b/llvm/test/CodeGen/AMDGPU/load-local-f64.ll
index 96b11071c6be53..0a94e54834135f 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-f64.ll
@@ -17,7 +17,7 @@
 
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_f64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load double, ptr addrspace(3) %in
   store double %ld, ptr addrspace(3) %out
   ret void
@@ -33,7 +33,7 @@ define amdgpu_kernel void @local_load_f64(ptr addrspace(3) %out, ptr addrspace(3
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v2f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v2f64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <2 x double>, ptr addrspace(3) %in
   store <2 x double> %ld, ptr addrspace(3) %out
@@ -53,7 +53,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v3f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v3f64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <3 x double>, ptr addrspace(3) %in
   store <3 x double> %ld, ptr addrspace(3) %out
@@ -76,7 +76,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v4f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v4f64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <4 x double>, ptr addrspace(3) %in
   store <4 x double> %ld, ptr addrspace(3) %out
@@ -108,7 +108,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v8f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v8f64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <8 x double>, ptr addrspace(3) %in
   store <8 x double> %ld, ptr addrspace(3) %out
@@ -167,7 +167,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v16f64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v16f64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <16 x double>, ptr addrspace(3) %in
   store <16 x double> %ld, ptr addrspace(3) %out
@@ -190,5 +190,3 @@ entry:
   store <2 x double> %ld, ptr addrspace(3) %out, align 16
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
index ea858fb67443d1..7c7f733fef6202 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
@@ -14,7 +14,7 @@
 ; EG: LDS_UBYTE_READ_RET
 ; EG: AND_INT
 ; EG: LDS_BYTE_WRITE
-define amdgpu_kernel void @local_load_i1(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_i1(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load i1, ptr addrspace(3) %in
   store i1 %load, ptr addrspace(3) %out
   ret void
@@ -23,7 +23,7 @@ define amdgpu_kernel void @local_load_i1(ptr addrspace(3) %out, ptr addrspace(3)
 ; FUNC-LABEL: {{^}}local_load_v2i1:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_load_v2i1(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v2i1(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(3) %in
   store <2 x i1> %load, ptr addrspace(3) %out
   ret void
@@ -32,7 +32,7 @@ define amdgpu_kernel void @local_load_v2i1(ptr addrspace(3) %out, ptr addrspace(
 ; FUNC-LABEL: {{^}}local_load_v3i1:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_load_v3i1(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v3i1(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(3) %in
   store <3 x i1> %load, ptr addrspace(3) %out
   ret void
@@ -41,7 +41,7 @@ define amdgpu_kernel void @local_load_v3i1(ptr addrspace(3) %out, ptr addrspace(
 ; FUNC-LABEL: {{^}}local_load_v4i1:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_load_v4i1(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v4i1(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(3) %in
   store <4 x i1> %load, ptr addrspace(3) %out
   ret void
@@ -50,7 +50,7 @@ define amdgpu_kernel void @local_load_v4i1(ptr addrspace(3) %out, ptr addrspace(
 ; FUNC-LABEL: {{^}}local_load_v8i1:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_load_v8i1(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v8i1(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(3) %in
   store <8 x i1> %load, ptr addrspace(3) %out
   ret void
@@ -59,7 +59,7 @@ define amdgpu_kernel void @local_load_v8i1(ptr addrspace(3) %out, ptr addrspace(
 ; FUNC-LABEL: {{^}}local_load_v16i1:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_load_v16i1(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v16i1(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(3) %in
   store <16 x i1> %load, ptr addrspace(3) %out
   ret void
@@ -68,7 +68,7 @@ define amdgpu_kernel void @local_load_v16i1(ptr addrspace(3) %out, ptr addrspace
 ; FUNC-LABEL: {{^}}local_load_v32i1:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_load_v32i1(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v32i1(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(3) %in
   store <32 x i1> %load, ptr addrspace(3) %out
   ret void
@@ -77,7 +77,7 @@ define amdgpu_kernel void @local_load_v32i1(ptr addrspace(3) %out, ptr addrspace
 ; FUNC-LABEL: {{^}}local_load_v64i1:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_load_v64i1(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v64i1(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(3) %in
   store <64 x i1> %load, ptr addrspace(3) %out
   ret void
@@ -89,7 +89,7 @@ define amdgpu_kernel void @local_load_v64i1(ptr addrspace(3) %out, ptr addrspace
 
 ; GCN: ds_read_u8
 ; GCN: ds_write_b32
-define amdgpu_kernel void @local_zextload_i1_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_i1_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i1, ptr addrspace(3) %in
   %ext = zext i1 %a to i32
   store i32 %ext, ptr addrspace(3) %out
@@ -106,7 +106,7 @@ define amdgpu_kernel void @local_zextload_i1_to_i32(ptr addrspace(3) %out, ptr a
 
 ; EG: LDS_UBYTE_READ_RET
 ; EG: BFE_INT
-define amdgpu_kernel void @local_sextload_i1_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_i1_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i1, ptr addrspace(3) %in
   %ext = sext i1 %a to i32
   store i32 %ext, ptr addrspace(3) %out
@@ -116,7 +116,7 @@ define amdgpu_kernel void @local_sextload_i1_to_i32(ptr addrspace(3) %out, ptr a
 ; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v1i1_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i1_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i1>, ptr addrspace(3) %in
   %ext = zext <1 x i1> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(3) %out
@@ -126,7 +126,7 @@ define amdgpu_kernel void @local_zextload_v1i1_to_v1i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v1i1_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i1_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i1>, ptr addrspace(3) %in
   %ext = sext <1 x i1> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(3) %out
@@ -136,7 +136,7 @@ define amdgpu_kernel void @local_sextload_v1i1_to_v1i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v2i1_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i1_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(3) %in
   %ext = zext <2 x i1> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(3) %out
@@ -146,7 +146,7 @@ define amdgpu_kernel void @local_zextload_v2i1_to_v2i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v2i1_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i1_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(3) %in
   %ext = sext <2 x i1> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(3) %out
@@ -156,7 +156,7 @@ define amdgpu_kernel void @local_sextload_v2i1_to_v2i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v3i1_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v3i1_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(3) %in
   %ext = zext <3 x i1> %load to <3 x i32>
   store <3 x i32> %ext, ptr addrspace(3) %out
@@ -166,7 +166,7 @@ define amdgpu_kernel void @local_zextload_v3i1_to_v3i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v3i1_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v3i1_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(3) %in
   %ext = sext <3 x i1> %load to <3 x i32>
   store <3 x i32> %ext, ptr addrspace(3) %out
@@ -176,7 +176,7 @@ define amdgpu_kernel void @local_sextload_v3i1_to_v3i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v4i1_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i1_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(3) %in
   %ext = zext <4 x i1> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(3) %out
@@ -186,7 +186,7 @@ define amdgpu_kernel void @local_zextload_v4i1_to_v4i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v4i1_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i1_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(3) %in
   %ext = sext <4 x i1> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(3) %out
@@ -196,7 +196,7 @@ define amdgpu_kernel void @local_sextload_v4i1_to_v4i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v8i1_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i1_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(3) %in
   %ext = zext <8 x i1> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(3) %out
@@ -206,7 +206,7 @@ define amdgpu_kernel void @local_zextload_v8i1_to_v8i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v8i1_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i1_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(3) %in
   %ext = sext <8 x i1> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(3) %out
@@ -216,7 +216,7 @@ define amdgpu_kernel void @local_sextload_v8i1_to_v8i32(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v16i1_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i1_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(3) %in
   %ext = zext <16 x i1> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(3) %out
@@ -226,7 +226,7 @@ define amdgpu_kernel void @local_zextload_v16i1_to_v16i32(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v16i1_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i1_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(3) %in
   %ext = sext <16 x i1> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(3) %out
@@ -236,7 +236,7 @@ define amdgpu_kernel void @local_sextload_v16i1_to_v16i32(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v32i1_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i1_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(3) %in
   %ext = zext <32 x i1> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(3) %out
@@ -246,7 +246,7 @@ define amdgpu_kernel void @local_zextload_v32i1_to_v32i32(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v32i1_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i1_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(3) %in
   %ext = sext <32 x i1> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(3) %out
@@ -256,7 +256,7 @@ define amdgpu_kernel void @local_sextload_v32i1_to_v32i32(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v64i1_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v64i1_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(3) %in
   %ext = zext <64 x i1> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(3) %out
@@ -266,7 +266,7 @@ define amdgpu_kernel void @local_zextload_v64i1_to_v64i32(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i32:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v64i1_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v64i1_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(3) %in
   %ext = sext <64 x i1> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(3) %out
@@ -280,7 +280,7 @@ define amdgpu_kernel void @local_sextload_v64i1_to_v64i32(ptr addrspace(3) %out,
 ; GCN-DAG: ds_read_u8 [[LOAD:v[0-9]+]],
 ; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
 ; GCN: ds_write_b64
-define amdgpu_kernel void @local_zextload_i1_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_i1_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i1, ptr addrspace(3) %in
   %ext = zext i1 %a to i64
   store i64 %ext, ptr addrspace(3) %out
@@ -295,7 +295,7 @@ define amdgpu_kernel void @local_zextload_i1_to_i64(ptr addrspace(3) %out, ptr a
 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
 ; GCN: ds_write_b64
-define amdgpu_kernel void @local_sextload_i1_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_i1_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i1, ptr addrspace(3) %in
   %ext = sext i1 %a to i64
   store i64 %ext, ptr addrspace(3) %out
@@ -305,7 +305,7 @@ define amdgpu_kernel void @local_sextload_i1_to_i64(ptr addrspace(3) %out, ptr a
 ; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v1i1_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i1_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i1>, ptr addrspace(3) %in
   %ext = zext <1 x i1> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(3) %out
@@ -315,7 +315,7 @@ define amdgpu_kernel void @local_zextload_v1i1_to_v1i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v1i1_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i1_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i1>, ptr addrspace(3) %in
   %ext = sext <1 x i1> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(3) %out
@@ -325,7 +325,7 @@ define amdgpu_kernel void @local_sextload_v1i1_to_v1i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v2i1_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i1_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(3) %in
   %ext = zext <2 x i1> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(3) %out
@@ -335,7 +335,7 @@ define amdgpu_kernel void @local_zextload_v2i1_to_v2i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v2i1_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i1_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i1>, ptr addrspace(3) %in
   %ext = sext <2 x i1> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(3) %out
@@ -345,7 +345,7 @@ define amdgpu_kernel void @local_sextload_v2i1_to_v2i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v3i1_to_v3i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v3i1_to_v3i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(3) %in
   %ext = zext <3 x i1> %load to <3 x i64>
   store <3 x i64> %ext, ptr addrspace(3) %out
@@ -355,7 +355,7 @@ define amdgpu_kernel void @local_zextload_v3i1_to_v3i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v3i1_to_v3i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v3i1_to_v3i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <3 x i1>, ptr addrspace(3) %in
   %ext = sext <3 x i1> %load to <3 x i64>
   store <3 x i64> %ext, ptr addrspace(3) %out
@@ -365,7 +365,7 @@ define amdgpu_kernel void @local_sextload_v3i1_to_v3i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v4i1_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i1_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(3) %in
   %ext = zext <4 x i1> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(3) %out
@@ -375,7 +375,7 @@ define amdgpu_kernel void @local_zextload_v4i1_to_v4i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v4i1_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i1_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i1>, ptr addrspace(3) %in
   %ext = sext <4 x i1> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(3) %out
@@ -385,7 +385,7 @@ define amdgpu_kernel void @local_sextload_v4i1_to_v4i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v8i1_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i1_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(3) %in
   %ext = zext <8 x i1> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(3) %out
@@ -395,7 +395,7 @@ define amdgpu_kernel void @local_zextload_v8i1_to_v8i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v8i1_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i1_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i1>, ptr addrspace(3) %in
   %ext = sext <8 x i1> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(3) %out
@@ -405,7 +405,7 @@ define amdgpu_kernel void @local_sextload_v8i1_to_v8i64(ptr addrspace(3) %out, p
 ; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v16i1_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i1_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(3) %in
   %ext = zext <16 x i1> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(3) %out
@@ -415,7 +415,7 @@ define amdgpu_kernel void @local_zextload_v16i1_to_v16i64(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v16i1_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i1_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i1>, ptr addrspace(3) %in
   %ext = sext <16 x i1> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(3) %out
@@ -425,7 +425,7 @@ define amdgpu_kernel void @local_sextload_v16i1_to_v16i64(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v32i1_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i1_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(3) %in
   %ext = zext <32 x i1> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(3) %out
@@ -435,7 +435,7 @@ define amdgpu_kernel void @local_zextload_v32i1_to_v32i64(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v32i1_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i1_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i1>, ptr addrspace(3) %in
   %ext = sext <32 x i1> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(3) %out
@@ -445,7 +445,7 @@ define amdgpu_kernel void @local_sextload_v32i1_to_v32i64(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_zextload_v64i1_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v64i1_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(3) %in
   %ext = zext <64 x i1> %load to <64 x i64>
   store <64 x i64> %ext, ptr addrspace(3) %out
@@ -455,11 +455,9 @@ define amdgpu_kernel void @local_zextload_v64i1_to_v64i64(ptr addrspace(3) %out,
 ; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
-define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <64 x i1>, ptr addrspace(3) %in
   %ext = sext <64 x i1> %load to <64 x i64>
   store <64 x i64> %ext, ptr addrspace(3) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
index 1dd08c561b2ab4..6a060789f7bcfd 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -128,7 +128,7 @@ entry:
 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define amdgpu_kernel void @local_zextload_i16_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_i16_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i16, ptr addrspace(3) %in
   %ext = zext i16 %a to i32
   store i32 %ext, ptr addrspace(3) %out
@@ -150,7 +150,7 @@ define amdgpu_kernel void @local_zextload_i16_to_i32(ptr addrspace(3) %out, ptr
 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
 ; EG: 16
 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define amdgpu_kernel void @local_sextload_i16_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_i16_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i16, ptr addrspace(3) %in
   %ext = sext i16 %a to i32
   store i32 %ext, ptr addrspace(3) %out
@@ -168,7 +168,7 @@ define amdgpu_kernel void @local_sextload_i16_to_i32(ptr addrspace(3) %out, ptr
 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i16>, ptr addrspace(3) %in
   %ext = zext <1 x i16> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(3) %out
@@ -188,7 +188,7 @@ define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(ptr addrspace(3) %out,
 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
 ; EG: 16
 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i16>, ptr addrspace(3) %in
   %ext = sext <1 x i16> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(3) %out
@@ -203,7 +203,7 @@ define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(ptr addrspace(3) %out,
 ; GCN: ds_read_b32
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i16>, ptr addrspace(3) %in
   %ext = zext <2 x i16> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(3) %out
@@ -220,7 +220,7 @@ define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(ptr addrspace(3) %out,
 ; EG: LDS_READ_RET
 ; EG: BFE_INT
 ; EG: BFE_INT
-define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i16>, ptr addrspace(3) %in
   %ext = sext <2 x i16> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(3) %out
@@ -281,7 +281,7 @@ entry:
 
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i16>, ptr addrspace(3) %in
   %ext = zext <4 x i16> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(3) %out
@@ -301,7 +301,7 @@ define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(ptr addrspace(3)
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i16>, ptr addrspace(3) %in
   %ext = sext <4 x i16> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(3) %out
@@ -318,7 +318,7 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(ptr addrspace(3) %out,
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i16>, ptr addrspace(3) %in
   %ext = zext <8 x i16> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(3) %out
@@ -343,7 +343,7 @@ define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(ptr addrspace(3) %out,
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i16>, ptr addrspace(3) %in
   %ext = sext <8 x i16> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(3) %out
@@ -370,7 +370,7 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(ptr addrspace(3) %out,
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i16>, ptr addrspace(3) %in
   %ext = zext <16 x i16> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(3) %out
@@ -409,7 +409,7 @@ define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(ptr addrspace(3) %out
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i16>, ptr addrspace(3) %in
   %ext = sext <16 x i16> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(3) %out
@@ -441,7 +441,7 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(ptr addrspace(3) %out
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i16>, ptr addrspace(3) %in
   %ext = zext <32 x i16> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(3) %out
@@ -481,7 +481,7 @@ define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(ptr addrspace(3) %out
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i16>, ptr addrspace(3) %in
   %ext = sext <32 x i16> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(3) %out
@@ -549,7 +549,7 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(ptr addrspace(3) %out
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <64 x i16>, ptr addrspace(3) %in
   %ext = zext <64 x i16> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(3) %out
@@ -592,7 +592,7 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <64 x i16>, ptr addrspace(3) %in
   %ext = sext <64 x i16> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(3) %out
@@ -613,7 +613,7 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
 ; EG-DAG: LDS_WRITE
-define amdgpu_kernel void @local_zextload_i16_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_i16_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i16, ptr addrspace(3) %in
   %ext = zext i16 %a to i64
   store i64 %ext, ptr addrspace(3) %out
@@ -643,7 +643,7 @@ define amdgpu_kernel void @local_zextload_i16_to_i64(ptr addrspace(3) %out, ptr
 ; EG-DAG: LDS_WRITE
 ; EG-DAG: 16
 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define amdgpu_kernel void @local_sextload_i16_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_i16_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i16, ptr addrspace(3) %in
   %ext = sext i16 %a to i64
   store i64 %ext, ptr addrspace(3) %out
@@ -660,7 +660,7 @@ define amdgpu_kernel void @local_sextload_i16_to_i64(ptr addrspace(3) %out, ptr
 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
 ; EG-DAG: LDS_WRITE
-define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i16>, ptr addrspace(3) %in
   %ext = zext <1 x i16> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(3) %out
@@ -680,7 +680,7 @@ define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(ptr addrspace(3) %out,
 ; EG-DAG: LDS_WRITE
 ; EG-DAG: 16
 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
-define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i16>, ptr addrspace(3) %in
   %ext = sext <1 x i16> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(3) %out
@@ -693,7 +693,7 @@ define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(ptr addrspace(3) %out,
 
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i16>, ptr addrspace(3) %in
   %ext = zext <2 x i16> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(3) %out
@@ -708,7 +708,7 @@ define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(ptr addrspace(3) %out,
 ; EG: LDS_READ_RET
 ; EG-DAG: BFE_INT
 ; EG-DAG: ASHR
-define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i16>, ptr addrspace(3) %in
   %ext = sext <2 x i16> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(3) %out
@@ -722,7 +722,7 @@ define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(ptr addrspace(3) %out,
 
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i16>, ptr addrspace(3) %in
   %ext = zext <4 x i16> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(3) %out
@@ -740,7 +740,7 @@ define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(ptr addrspace(3) %out,
 ; EG-DAG: BFE_INT
 ; EG-DAG: ASHR
 ; EG-DAG: ASHR
-define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i16>, ptr addrspace(3) %in
   %ext = sext <4 x i16> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(3) %out
@@ -756,7 +756,7 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(ptr addrspace(3) %out,
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i16>, ptr addrspace(3) %in
   %ext = zext <8 x i16> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(3) %out
@@ -780,7 +780,7 @@ define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(ptr addrspace(3) %out,
 ; EG-DAG: BFE_INT
 ; EG-DAG: ASHR
 ; EG-DAG: ASHR
-define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i16>, ptr addrspace(3) %in
   %ext = sext <8 x i16> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(3) %out
@@ -800,7 +800,7 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i16>, ptr addrspace(3) %in
   %ext = zext <16 x i16> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(3) %out
@@ -836,7 +836,7 @@ define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(ptr addrspace(3) %out
 ; EG-DAG: BFE_INT
 ; EG-DAG: ASHR
 ; EG-DAG: ASHR
-define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i16>, ptr addrspace(3) %in
   %ext = sext <16 x i16> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(3) %out
@@ -864,7 +864,7 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i16>, ptr addrspace(3) %in
   %ext = zext <32 x i16> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(3) %out
@@ -924,7 +924,7 @@ define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(ptr addrspace(3) %out
 ; EG-DAG: BFE_INT
 ; EG-DAG: ASHR
 ; EG-DAG: ASHR
-define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i16>, ptr addrspace(3) %in
   %ext = sext <32 x i16> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(3) %out
@@ -932,7 +932,7 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
 }
 
 ; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
-; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 ;   %load = load <64 x i16>, ptr addrspace(3) %in
 ;   %ext = zext <64 x i16> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(3) %out
@@ -940,7 +940,7 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
 ; }
 
 ; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
-; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 ;   %load = load <64 x i16>, ptr addrspace(3) %in
 ;   %ext = sext <64 x i16> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(3) %out
@@ -965,5 +965,3 @@ define amdgpu_kernel void @local_v8i16_to_128(ptr addrspace(3) %out, ptr addrspa
   store <8 x i16> %ld, ptr addrspace(3) %out, align 16
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll
index c445d2b6ac489d..713841fbaedbd6 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll
@@ -16,7 +16,7 @@
 ; GCN: ds_read_b32
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load i32, ptr addrspace(3) %in
   store i32 %ld, ptr addrspace(3) %out
@@ -28,7 +28,7 @@ entry:
 ; GFX9-NOT: m0
 
 ; GCN: ds_read_b64
-define amdgpu_kernel void @local_load_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <2 x i32>, ptr addrspace(3) %in
   store <2 x i32> %ld, ptr addrspace(3) %out
@@ -42,7 +42,7 @@ entry:
 ; SI-DAG: ds_read_b64
 ; SI-DAG: ds_read_b32
 ; CIVI-DAG: ds_read_b96
-define amdgpu_kernel void @local_load_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <3 x i32>, ptr addrspace(3) %in
   store <3 x i32> %ld, ptr addrspace(3) %out
@@ -55,7 +55,7 @@ entry:
 
 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
 
-define amdgpu_kernel void @local_load_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <4 x i32>, ptr addrspace(3) %in
   store <4 x i32> %ld, ptr addrspace(3) %out
@@ -68,7 +68,7 @@ entry:
 
 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
-define amdgpu_kernel void @local_load_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <8 x i32>, ptr addrspace(3) %in
   store <8 x i32> %ld, ptr addrspace(3) %out
@@ -87,7 +87,7 @@ entry:
 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
-define amdgpu_kernel void @local_load_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <16 x i32>, ptr addrspace(3) %in
   store <16 x i32> %ld, ptr addrspace(3) %out
@@ -98,7 +98,7 @@ entry:
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_zextload_i32_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_i32_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load i32, ptr addrspace(3) %in
   %ext = zext i32 %ld to i64
   store i64 %ext, ptr addrspace(3) %out
@@ -109,7 +109,7 @@ define amdgpu_kernel void @local_zextload_i32_to_i64(ptr addrspace(3) %out, ptr
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_sextload_i32_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_i32_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load i32, ptr addrspace(3) %in
   %ext = sext i32 %ld to i64
   store i64 %ext, ptr addrspace(3) %out
@@ -120,7 +120,7 @@ define amdgpu_kernel void @local_sextload_i32_to_i64(ptr addrspace(3) %out, ptr
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <1 x i32>, ptr addrspace(3) %in
   %ext = zext <1 x i32> %ld to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(3) %out
@@ -131,7 +131,7 @@ define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(ptr addrspace(3) %out,
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <1 x i32>, ptr addrspace(3) %in
   %ext = sext <1 x i32> %ld to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(3) %out
@@ -142,7 +142,7 @@ define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(ptr addrspace(3) %out,
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <2 x i32>, ptr addrspace(3) %in
   %ext = zext <2 x i32> %ld to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(3) %out
@@ -153,7 +153,7 @@ define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(ptr addrspace(3) %out,
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <2 x i32>, ptr addrspace(3) %in
   %ext = sext <2 x i32> %ld to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(3) %out
@@ -164,7 +164,7 @@ define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(ptr addrspace(3) %out,
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <4 x i32>, ptr addrspace(3) %in
   %ext = zext <4 x i32> %ld to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(3) %out
@@ -175,7 +175,7 @@ define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(ptr addrspace(3) %out,
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <4 x i32>, ptr addrspace(3) %in
   %ext = sext <4 x i32> %ld to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(3) %out
@@ -205,7 +205,7 @@ define amdgpu_kernel void @local_v4i32_to_128(ptr addrspace(3) %out, ptr addrspa
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <8 x i32>, ptr addrspace(3) %in
   %ext = zext <8 x i32> %ld to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(3) %out
@@ -216,7 +216,7 @@ define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(ptr addrspace(3) %out,
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <8 x i32>, ptr addrspace(3) %in
   %ext = sext <8 x i32> %ld to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(3) %out
@@ -227,7 +227,7 @@ define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(ptr addrspace(3) %out,
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <16 x i32>, ptr addrspace(3) %in
   %ext = sext <16 x i32> %ld to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(3) %out
@@ -238,7 +238,7 @@ define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(ptr addrspace(3) %out
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <16 x i32>, ptr addrspace(3) %in
   %ext = zext <16 x i32> %ld to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(3) %out
@@ -249,7 +249,7 @@ define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(ptr addrspace(3) %out
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <32 x i32>, ptr addrspace(3) %in
   %ext = sext <32 x i32> %ld to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(3) %out
@@ -260,7 +260,7 @@ define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(ptr addrspace(3) %out
 ; SICIVI: s_mov_b32 m0, -1
 ; GFX9-NOT: m0
 
-define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <32 x i32>, ptr addrspace(3) %in
   %ext = zext <32 x i32> %ld to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(3) %out
@@ -272,10 +272,8 @@ define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(ptr addrspace(3) %out
 ; GFX9-NOT: m0
 ; GFX9-NOT: accvgpr
 
-define amdgpu_kernel void @local_load_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <32 x i32>, ptr addrspace(3) %in
   store <32 x i32> %ld, ptr addrspace(3) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i64.ll b/llvm/test/CodeGen/AMDGPU/load-local-i64.ll
index fe33f292993a2c..964878b087686a 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i64.ll
@@ -17,7 +17,7 @@
 
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load i64, ptr addrspace(3) %in
   store i64 %ld, ptr addrspace(3) %out
   ret void
@@ -33,7 +33,7 @@ define amdgpu_kernel void @local_load_i64(ptr addrspace(3) %out, ptr addrspace(3
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <2 x i64>, ptr addrspace(3) %in
   store <2 x i64> %ld, ptr addrspace(3) %out
@@ -66,7 +66,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v3i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v3i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <3 x i64>, ptr addrspace(3) %in
   store <3 x i64> %ld, ptr addrspace(3) %out
@@ -89,7 +89,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <4 x i64>, ptr addrspace(3) %in
   store <4 x i64> %ld, ptr addrspace(3) %out
@@ -121,7 +121,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <8 x i64>, ptr addrspace(3) %in
   store <8 x i64> %ld, ptr addrspace(3) %out
@@ -180,11 +180,9 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <16 x i64>, ptr addrspace(3) %in
   store <16 x i64> %ld, ptr addrspace(3) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i8.ll b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll
index 9b1b32a65f2338..ae32c27cf757aa 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll
@@ -14,7 +14,7 @@
 ; GCN: ds_read_u8
 
 ; EG: LDS_UBYTE_READ_RET
-define amdgpu_kernel void @local_load_i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_i8(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load i8, ptr addrspace(3) %in
   store i8 %ld, ptr addrspace(3) %out
@@ -28,7 +28,7 @@ entry:
 ; GCN: ds_read_u16
 
 ; EG: LDS_USHORT_READ_RET
-define amdgpu_kernel void @local_load_v2i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v2i8(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <2 x i8>, ptr addrspace(3) %in
   store <2 x i8> %ld, ptr addrspace(3) %out
@@ -40,7 +40,7 @@ entry:
 ; GCN: ds_read_b32
 
 ; EG: DS_READ_RET
-define amdgpu_kernel void @local_load_v3i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v3i8(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <3 x i8>, ptr addrspace(3) %in
   store <3 x i8> %ld, ptr addrspace(3) %out
@@ -52,7 +52,7 @@ entry:
 ; GCN: ds_read_b32
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v4i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v4i8(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <4 x i8>, ptr addrspace(3) %in
   store <4 x i8> %ld, ptr addrspace(3) %out
@@ -65,7 +65,7 @@ entry:
 
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v8i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v8i8(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <8 x i8>, ptr addrspace(3) %in
   store <8 x i8> %ld, ptr addrspace(3) %out
@@ -81,7 +81,7 @@ entry:
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_load_v16i8(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_v16i8(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <16 x i8>, ptr addrspace(3) %in
   store <16 x i8> %ld, ptr addrspace(3) %out
@@ -95,7 +95,7 @@ entry:
 ; GCN: ds_read_u8
 
 ; EG: LDS_UBYTE_READ_RET
-define amdgpu_kernel void @local_zextload_i8_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_i8_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i8, ptr addrspace(3) %in
   %ext = zext i8 %a to i32
   store i32 %ext, ptr addrspace(3) %out
@@ -110,7 +110,7 @@ define amdgpu_kernel void @local_zextload_i8_to_i32(ptr addrspace(3) %out, ptr a
 
 ; EG: LDS_UBYTE_READ_RET
 ; EG: BFE_INT
-define amdgpu_kernel void @local_sextload_i8_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_i8_to_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %ld = load i8, ptr addrspace(3) %in
   %ext = sext i8 %ld to i32
   store i32 %ext, ptr addrspace(3) %out
@@ -120,7 +120,7 @@ define amdgpu_kernel void @local_sextload_i8_to_i32(ptr addrspace(3) %out, ptr a
 ; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32:
 
 ; EG: LDS_UBYTE_READ_RET
-define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(3) %in
   %ext = zext <1 x i8> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(3) %out
@@ -132,7 +132,7 @@ define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(ptr addrspace(3) %out, p
 
 ; EG: LDS_UBYTE_READ_RET
 ; EG: BFE_INT
-define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(3) %in
   %ext = sext <1 x i8> %load to <1 x i32>
   store <1 x i32> %ext, ptr addrspace(3) %out
@@ -144,7 +144,7 @@ define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(ptr addrspace(3) %out, p
 ; GCN: ds_read_u16
 
 ; EG: LDS_USHORT_READ_RET
-define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(3) %in
   %ext = zext <2 x i8> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(3) %out
@@ -171,7 +171,7 @@ define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(ptr addrspace(3) %out, p
 ; EG: LDS_USHORT_READ_RET
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(3) %in
   %ext = sext <2 x i8> %load to <2 x i32>
   store <2 x i32> %ext, ptr addrspace(3) %out
@@ -188,7 +188,7 @@ define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(ptr addrspace(3) %out, p
 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v3i8_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v3i8_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <3 x i8>, ptr addrspace(3) %in
   %ext = zext <3 x i8> %ld to <3 x i32>
@@ -216,7 +216,7 @@ entry:
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-define amdgpu_kernel void @local_sextload_v3i8_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v3i8_to_v3i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %ld = load <3 x i8>, ptr addrspace(3) %in
   %ext = sext <3 x i8> %ld to <3 x i32>
@@ -234,7 +234,7 @@ entry:
 ; EG-DAG: BFE_UINT
 ; EG-DAG: BFE_UINT
 ; EG-DAG: BFE_UINT
-define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(3) %in
   %ext = zext <4 x i8> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(3) %out
@@ -252,7 +252,7 @@ define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(ptr addrspace(3) %out, p
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(3) %in
   %ext = sext <4 x i8> %load to <4 x i32>
   store <4 x i32> %ext, ptr addrspace(3) %out
@@ -271,7 +271,7 @@ define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(ptr addrspace(3) %out, p
 ; EG-DAG: BFE_UINT
 ; EG-DAG: BFE_UINT
 ; EG-DAG: BFE_UINT
-define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(3) %in
   %ext = zext <8 x i8> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(3) %out
@@ -292,7 +292,7 @@ define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(ptr addrspace(3) %out, p
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(3) %in
   %ext = sext <8 x i8> %load to <8 x i32>
   store <8 x i32> %ext, ptr addrspace(3) %out
@@ -319,7 +319,7 @@ define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(ptr addrspace(3) %out, p
 ; EG-DAG: BFE_UINT
 ; EG-DAG: BFE_UINT
 ; EG-DAG: BFE_UINT
-define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(3) %in
   %ext = zext <16 x i8> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(3) %out
@@ -350,7 +350,7 @@ define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(ptr addrspace(3) %out,
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(3) %in
   %ext = sext <16 x i8> %load to <16 x i32>
   store <16 x i32> %ext, ptr addrspace(3) %out
@@ -369,7 +369,7 @@ define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(ptr addrspace(3) %out,
 ; EG-DAG: LDS_READ_RET
 ; EG-DAG: LDS_READ_RET
 ; EG-DAG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(3) %in
   %ext = zext <32 x i8> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(3) %out
@@ -388,7 +388,7 @@ define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(ptr addrspace(3) %out,
 ; EG-DAG: LDS_READ_RET
 ; EG-DAG: LDS_READ_RET
 ; EG-DAG: LDS_READ_RET
-define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(3) %in
   %ext = sext <32 x i8> %load to <32 x i32>
   store <32 x i32> %ext, ptr addrspace(3) %out
@@ -415,7 +415,7 @@ define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(ptr addrspace(3) %out,
 ; EG-DAG: LDS_READ_RET
 ; EG-DAG: LDS_READ_RET
 ; EG-DAG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <64 x i8>, ptr addrspace(3) %in
   %ext = zext <64 x i8> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(3) %out
@@ -442,7 +442,7 @@ define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(ptr addrspace(3) %out,
 ; EG-DAG: LDS_READ_RET
 ; EG-DAG: LDS_READ_RET
 ; EG-DAG: LDS_READ_RET
-define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <64 x i8>, ptr addrspace(3) %in
   %ext = sext <64 x i8> %load to <64 x i32>
   store <64 x i32> %ext, ptr addrspace(3) %out
@@ -460,7 +460,7 @@ define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(ptr addrspace(3) %out,
 ; EG: LDS_UBYTE_READ_RET
 ; EG: MOV {{.*}}, literal
 ; EG: 0.0
-define amdgpu_kernel void @local_zextload_i8_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_i8_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i8, ptr addrspace(3) %in
   %ext = zext i8 %a to i64
   store i64 %ext, ptr addrspace(3) %out
@@ -480,7 +480,7 @@ define amdgpu_kernel void @local_zextload_i8_to_i64(ptr addrspace(3) %out, ptr a
 ; EG: ASHR
 ; TODO: why not 7?
 ; EG: 31
-define amdgpu_kernel void @local_sextload_i8_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_i8_to_i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i8, ptr addrspace(3) %in
   %ext = sext i8 %a to i64
   store i64 %ext, ptr addrspace(3) %out
@@ -495,7 +495,7 @@ define amdgpu_kernel void @local_sextload_i8_to_i64(ptr addrspace(3) %out, ptr a
 ; EG: MOV {{.*}}, literal
 ; TODO: merge?
 ; EG: 0.0
-define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(3) %in
   %ext = zext <1 x i8> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(3) %out
@@ -510,7 +510,7 @@ define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(ptr addrspace(3) %out, p
 ; EG: ASHR
 ; TODO: why not 7?
 ; EG: 31
-define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(3) %in
   %ext = sext <1 x i8> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(3) %out
@@ -522,7 +522,7 @@ define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(ptr addrspace(3) %out, p
 ; GFX9-NOT: m0
 
 ; EG: LDS_USHORT_READ_RET
-define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(3) %in
   %ext = zext <2 x i8> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(3) %out
@@ -536,7 +536,7 @@ define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(ptr addrspace(3) %out, p
 ; EG: LDS_USHORT_READ_RET
 ; EG: BFE_INT
 ; EG: BFE_INT
-define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(3) %in
   %ext = sext <2 x i8> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(3) %out
@@ -548,7 +548,7 @@ define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(ptr addrspace(3) %out, p
 ; GFX9-NOT: m0
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(3) %in
   %ext = zext <4 x i8> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(3) %out
@@ -560,7 +560,7 @@ define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(ptr addrspace(3) %out, p
 ; GFX9-NOT: m0
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(3) %in
   %ext = sext <4 x i8> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(3) %out
@@ -573,7 +573,7 @@ define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(ptr addrspace(3) %out, p
 
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(3) %in
   %ext = zext <8 x i8> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(3) %out
@@ -595,7 +595,7 @@ define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(ptr addrspace(3) %out, p
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
 ; EG-DAG: BFE_INT
-define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(3) %in
   %ext = sext <8 x i8> %load to <8 x i64>
   store <8 x i64> %ext, ptr addrspace(3) %out
@@ -610,7 +610,7 @@ define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(ptr addrspace(3) %out, p
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(3) %in
   %ext = zext <16 x i8> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(3) %out
@@ -625,7 +625,7 @@ define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(ptr addrspace(3) %out,
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(3) %in
   %ext = sext <16 x i8> %load to <16 x i64>
   store <16 x i64> %ext, ptr addrspace(3) %out
@@ -644,7 +644,7 @@ define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(ptr addrspace(3) %out,
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(3) %in
   %ext = zext <32 x i8> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(3) %out
@@ -663,7 +663,7 @@ define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(ptr addrspace(3) %out,
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(3) %in
   %ext = sext <32 x i8> %load to <32 x i64>
   store <32 x i64> %ext, ptr addrspace(3) %out
@@ -671,7 +671,7 @@ define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(ptr addrspace(3) %out,
 }
 
 ; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64:
-; define amdgpu_kernel void @local_zextload_v64i8_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+; define amdgpu_kernel void @local_zextload_v64i8_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(3) %in
 ;   %ext = zext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(3) %out
@@ -679,7 +679,7 @@ define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(ptr addrspace(3) %out,
 ; }
 
 ; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64:
-; define amdgpu_kernel void @local_sextload_v64i8_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+; define amdgpu_kernel void @local_sextload_v64i8_to_v64i64(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(3) %in
 ;   %ext = sext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, ptr addrspace(3) %out
@@ -694,7 +694,7 @@ define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(ptr addrspace(3) %out,
 
 ; EG: LDS_UBYTE_READ_RET
 ; EG: LDS_SHORT_WRITE
-define amdgpu_kernel void @local_zextload_i8_to_i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_i8_to_i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i8, ptr addrspace(3) %in
   %ext = zext i8 %a to i16
   store i16 %ext, ptr addrspace(3) %out
@@ -710,7 +710,7 @@ define amdgpu_kernel void @local_zextload_i8_to_i16(ptr addrspace(3) %out, ptr a
 ; EG: LDS_UBYTE_READ_RET
 ; EG: BFE_INT
 ; EG: LDS_SHORT_WRITE
-define amdgpu_kernel void @local_sextload_i8_to_i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_i8_to_i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %a = load i8, ptr addrspace(3) %in
   %ext = sext i8 %a to i16
   store i16 %ext, ptr addrspace(3) %out
@@ -723,7 +723,7 @@ define amdgpu_kernel void @local_sextload_i8_to_i16(ptr addrspace(3) %out, ptr a
 
 ; EG: LDS_UBYTE_READ_RET
 ; EG: LDS_SHORT_WRITE
-define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(3) %in
   %ext = zext <1 x i8> %load to <1 x i16>
   store <1 x i16> %ext, ptr addrspace(3) %out
@@ -737,7 +737,7 @@ define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(ptr addrspace(3) %out, p
 ; EG: LDS_UBYTE_READ_RET
 ; EG: BFE_INT
 ; EG: LDS_SHORT_WRITE
-define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <1 x i8>, ptr addrspace(3) %in
   %ext = sext <1 x i8> %load to <1 x i16>
   store <1 x i16> %ext, ptr addrspace(3) %out
@@ -750,7 +750,7 @@ define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(ptr addrspace(3) %out, p
 
 ; EG: LDS_USHORT_READ_RET
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(3) %in
   %ext = zext <2 x i8> %load to <2 x i16>
   store <2 x i16> %ext, ptr addrspace(3) %out
@@ -765,7 +765,7 @@ define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(ptr addrspace(3) %out, p
 ; EG: BFE_INT
 ; EG: BFE_INT
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <2 x i8>, ptr addrspace(3) %in
   %ext = sext <2 x i8> %load to <2 x i16>
   store <2 x i16> %ext, ptr addrspace(3) %out
@@ -779,7 +779,7 @@ define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(ptr addrspace(3) %out, p
 ; EG: LDS_READ_RET
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(3) %in
   %ext = zext <4 x i8> %load to <4 x i16>
   store <4 x i16> %ext, ptr addrspace(3) %out
@@ -798,7 +798,7 @@ define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(ptr addrspace(3) %out, p
 ; EG-DAG: BFE_INT
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <4 x i8>, ptr addrspace(3) %in
   %ext = sext <4 x i8> %load to <4 x i16>
   store <4 x i16> %ext, ptr addrspace(3) %out
@@ -815,7 +815,7 @@ define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(ptr addrspace(3) %out, p
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(3) %in
   %ext = zext <8 x i8> %load to <8 x i16>
   store <8 x i16> %ext, ptr addrspace(3) %out
@@ -841,7 +841,7 @@ define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(ptr addrspace(3) %out, p
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <8 x i8>, ptr addrspace(3) %in
   %ext = sext <8 x i8> %load to <8 x i16>
   store <8 x i16> %ext, ptr addrspace(3) %out
@@ -864,7 +864,7 @@ define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(ptr addrspace(3) %out, p
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(3) %in
   %ext = zext <16 x i8> %load to <16 x i16>
   store <16 x i16> %ext, ptr addrspace(3) %out
@@ -904,7 +904,7 @@ define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(ptr addrspace(3) %out,
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <16 x i8>, ptr addrspace(3) %in
   %ext = sext <16 x i8> %load to <16 x i16>
   store <16 x i16> %ext, ptr addrspace(3) %out
@@ -939,7 +939,7 @@ define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(ptr addrspace(3) %out,
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(3) %in
   %ext = zext <32 x i8> %load to <32 x i16>
   store <32 x i16> %ext, ptr addrspace(3) %out
@@ -1003,7 +1003,7 @@ define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(ptr addrspace(3) %out,
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
 ; EG: LDS_WRITE
-define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
   %load = load <32 x i8>, ptr addrspace(3) %in
   %ext = sext <32 x i8> %load to <32 x i16>
   store <32 x i16> %ext, ptr addrspace(3) %out
@@ -1011,7 +1011,7 @@ define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(ptr addrspace(3) %out,
 }
 
 ; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16:
-; define amdgpu_kernel void @local_zextload_v64i8_to_v64i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+; define amdgpu_kernel void @local_zextload_v64i8_to_v64i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(3) %in
 ;   %ext = zext <64 x i8> %load to <64 x i16>
 ;   store <64 x i16> %ext, ptr addrspace(3) %out
@@ -1019,7 +1019,7 @@ define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(ptr addrspace(3) %out,
 ; }
 
 ; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16:
-; define amdgpu_kernel void @local_sextload_v64i8_to_v64i16(ptr addrspace(3) %out, ptr addrspace(3) %in) #0 {
+; define amdgpu_kernel void @local_sextload_v64i8_to_v64i16(ptr addrspace(3) %out, ptr addrspace(3) %in) nounwind {
 ;   %load = load <64 x i8>, ptr addrspace(3) %in
 ;   %ext = sext <64 x i8> %load to <64 x i16>
 ;   store <64 x i16> %ext, ptr addrspace(3) %out
@@ -1044,5 +1044,3 @@ define amdgpu_kernel void @local_v16i8_to_128(ptr addrspace(3) %out, ptr addrspa
   store <16 x i8> %ld, ptr addrspace(3) %out, align 16
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-weird-sizes.ll b/llvm/test/CodeGen/AMDGPU/load-weird-sizes.ll
index 15ab2d730ab77c..8df4ed04cb5e96 100644
--- a/llvm/test/CodeGen/AMDGPU/load-weird-sizes.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-weird-sizes.ll
@@ -8,7 +8,7 @@
 ; SI-DAG: {{flat|buffer}}_load_ubyte
 ; SI-DAG: {{flat|buffer}}_load_ushort
 ; SI: {{flat|buffer}}_store_dword
-define amdgpu_kernel void @load_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @load_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %1 = load i24, ptr addrspace(1) %in
   %2 = zext i24 %1 to i32
   store i32 %2, ptr addrspace(1) %out
@@ -21,11 +21,9 @@ define amdgpu_kernel void @load_i24(ptr addrspace(1) %out, ptr addrspace(1) %in)
 
 ; CI-HSA: flat_load_dword [[VAL:v[0-9]+]]
 ; CI-HSA: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VAL]]
-define amdgpu_kernel void @load_i25(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @load_i25(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %1 = load i25, ptr addrspace(1) %in
   %2 = zext i25 %1 to i32
   store i32 %2, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll
index 8386a685a1a120..7752ba19b760d0 100644
--- a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll
@@ -4,7 +4,7 @@
 
 @local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
 
-define amdgpu_kernel void @local_memory(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @local_memory(ptr addrspace(1) %out) nounwind {
 ; GCN-LABEL: local_memory:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
@@ -25,7 +25,7 @@ define amdgpu_kernel void @local_memory(ptr addrspace(1) %out) #0 {
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
 entry:
-  %y.i = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %y.i = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %arrayidx = getelementptr inbounds [128 x i32], ptr addrspace(3) @local_memory.local_mem, i32 0, i32 %y.i
   store i32 %y.i, ptr addrspace(3) %arrayidx, align 4
   %add = add nsw i32 %y.i, 1
@@ -43,7 +43,7 @@ entry:
 @local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
 
 ; Check that the LDS size emitted correctly
-define amdgpu_kernel void @local_memory_two_objects(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @local_memory_two_objects(ptr addrspace(1) %out) nounwind {
 ; SI-LABEL: local_memory_two_objects:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
@@ -105,9 +105,5 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.s.barrier() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { convergent nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.ll b/llvm/test/CodeGen/AMDGPU/local-memory.ll
index 9e3180904cc243..738f959baa9115 100644
--- a/llvm/test/CodeGen/AMDGPU/local-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-memory.ll
@@ -14,7 +14,7 @@
 ; GCN: ds_read_b32 v{{[0-9]+}}, v[[PTR]] offset:4
 
 ; R600: LDS_READ_RET
-define amdgpu_kernel void @load_i32_local_const_ptr(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @load_i32_local_const_ptr(ptr addrspace(1) %out, ptr addrspace(3) %in) nounwind {
 entry:
   %tmp0 = getelementptr [512 x i32], ptr addrspace(3) @lds, i32 0, i32 1
   %tmp1 = load i32, ptr addrspace(3) %tmp0
@@ -30,7 +30,7 @@ entry:
 ; R600: LDS_READ_RET
 ; GCN-DAG: ds_read_b32
 ; GCN-DAG: ds_read2_b32
-define amdgpu_kernel void @load_i32_v2i32_local(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @load_i32_v2i32_local(ptr addrspace(1) %out, ptr addrspace(3) %in) nounwind {
   %scalar = load i32, ptr addrspace(3) %in
   %vec_ptr = getelementptr <2 x i32>, ptr addrspace(3) %in, i32 2
   %vec0 = load <2 x i32>, ptr addrspace(3) %vec_ptr, align 4
@@ -39,5 +39,3 @@ define amdgpu_kernel void @load_i32_v2i32_local(ptr addrspace(1) %out, ptr addrs
   store <2 x i32> %vec, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
index 7db27f5338fed8..122f34595d9cea 100644
--- a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
@@ -15,9 +15,9 @@
 ; EG-NEXT: ALU clause
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_memory(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @local_memory(ptr addrspace(1) %out) nounwind {
 entry:
-  %y.i = call i32 @llvm.r600.read.tidig.x() #1
+  %y.i = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %arrayidx = getelementptr inbounds [128 x i32], ptr addrspace(3) @local_memory.local_mem, i32 0, i32 %y.i
   store i32 %y.i, ptr addrspace(3) %arrayidx, align 4
   %add = add nsw i32 %y.i, 1
@@ -57,9 +57,9 @@ entry:
 ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
 ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
 
-define amdgpu_kernel void @local_memory_two_objects(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @local_memory_two_objects(ptr addrspace(1) %out) nounwind {
 entry:
-  %x.i = call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %arrayidx = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
   store i32 %x.i, ptr addrspace(3) %arrayidx, align 4
   %mul = shl nsw i32 %x.i, 1
@@ -79,9 +79,5 @@ entry:
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #1
-declare void @llvm.r600.group.barrier() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { convergent nounwind }
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare void @llvm.r600.group.barrier() convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 52f97150e4b301..63d523bfbeeb0e 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -325,6 +325,4 @@ entry:
   ret void
 }
 
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #0
-
-attributes #0 = { argmemonly nounwind willreturn writeonly }
+declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) argmemonly nounwind willreturn writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll b/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll
index cc90d03e667157..d3054df8646b08 100644
--- a/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll
+++ b/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll
@@ -6,12 +6,12 @@
 
 ; Used to emit an always 4 byte instruction. Inline asm always assumes
 ; each instruction is the maximum size.
-declare void @llvm.amdgcn.s.sleep(i32) #0
+declare void @llvm.amdgcn.s.sleep(i32) nounwind
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 
-define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 {
+define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addrspace(1) %arg, i32 %cnd) nounwind {
 ; GCN-LABEL: uniform_conditional_max_short_forward_branch:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -43,7 +43,7 @@ bb2:
   call void asm sideeffect
   "v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   call void @llvm.amdgcn.s.sleep(i32 0)
   br label %bb3
 
@@ -52,7 +52,7 @@ bb3:
   ret void
 }
 
-define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 {
+define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(ptr addrspace(1) %arg, i32 %cnd) nounwind {
 ; GCN-LABEL: uniform_conditional_min_long_forward_branch:
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -91,7 +91,7 @@ bb2:
   "v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %bb3
 
 bb3:
@@ -99,7 +99,7 @@ bb3:
   ret void
 }
 
-define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(ptr addrspace(1) %arg, float %cnd) #0 {
+define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(ptr addrspace(1) %arg, float %cnd) nounwind {
 ; GCN-LABEL: uniform_conditional_min_long_forward_vcnd_branch:
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -139,7 +139,7 @@ bb2:
   v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %bb3
 
 bb3:
@@ -147,7 +147,7 @@ bb3:
   ret void
 }
 
-define amdgpu_kernel void @min_long_forward_vbranch(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @min_long_forward_vbranch(ptr addrspace(1) %arg) nounwind {
 ; GCN-LABEL: min_long_forward_vbranch:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -198,7 +198,7 @@ bb2:
   v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %bb3
 
 bb3:
@@ -206,7 +206,7 @@ bb3:
   ret void
 }
 
-define amdgpu_kernel void @long_backward_sbranch(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @long_backward_sbranch(ptr addrspace(1) %arg) nounwind {
 ; GCN-LABEL: long_backward_sbranch:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_mov_b32 s0, 0
@@ -239,7 +239,7 @@ bb2:
   call void asm sideeffect
   "v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   %inc = add nsw i32 %loop.idx, 1 ; add cost 4
   %cmp = icmp slt i32 %inc, 10 ; condition cost = 8
   br i1 %cmp, label %bb2, label %bb3 ; -
@@ -319,13 +319,10 @@ bb3:
   "v_nop_e64
   v_nop_e64
   v_nop_e64
-  v_nop_e64", ""() #0
+  v_nop_e64", ""() nounwind
   br label %bb4
 
 bb4:
   store volatile i32 63, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/loop-address.ll b/llvm/test/CodeGen/AMDGPU/loop-address.ll
index cd7297ece25ed3..0d95ed2330e998 100644
--- a/llvm/test/CodeGen/AMDGPU/loop-address.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop-address.ll
@@ -5,7 +5,7 @@
 ;CHECK: LOOP_BREAK @10
 ;CHECK: POP @10
 
-define amdgpu_kernel void @loop_ge(ptr addrspace(1) nocapture %out, i32 %iterations) #0 {
+define amdgpu_kernel void @loop_ge(ptr addrspace(1) nocapture %out, i32 %iterations) nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" {
 entry:
   %cmp5 = icmp sgt i32 %iterations, 0
   br i1 %cmp5, label %for.body, label %for.end
@@ -24,8 +24,6 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-attributes #0 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
-
 !opencl.kernels = !{!0, !1, !2, !3}
 
 !0 = !{ptr @loop_ge}
diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll
index 634390ba33caf8..1c05f1a0e12ffb 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_break.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll
@@ -4,7 +4,7 @@
 
 ; Uses llvm.amdgcn.break
 
-define amdgpu_kernel void @break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @break_loop(i32 %arg) nounwind {
 ; OPT-LABEL: @break_loop(
 ; OPT-NEXT:  bb:
 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -85,7 +85,7 @@ bb9:
   ret void
 }
 
-define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) nounwind {
 ; OPT-LABEL: @undef_phi_cond_break_loop(
 ; OPT-NEXT:  bb:
 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -177,7 +177,7 @@ bb9:                                              ; preds = %Flow
 ; FIXME: ConstantExpr compare of address to null folds away
 @lds = addrspace(3) global i32 undef
 
-define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) nounwind {
 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
 ; OPT-NEXT:  bb:
 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -266,7 +266,7 @@ bb9:                                              ; preds = %Flow
   ret void
 }
 
-define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) nounwind {
 ; OPT-LABEL: @true_phi_cond_break_loop(
 ; OPT-NEXT:  bb:
 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -355,7 +355,7 @@ bb9:                                              ; preds = %Flow
   ret void
 }
 
-define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) nounwind {
 ; OPT-LABEL: @false_phi_cond_break_loop(
 ; OPT-NEXT:  bb:
 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -447,7 +447,7 @@ bb9:                                              ; preds = %Flow
 ; Swap order of branches in flow block so that the true phi is
 ; continue.
 
-define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) nounwind {
 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
 ; OPT-NEXT:  bb:
 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -538,7 +538,4 @@ bb9:                                              ; preds = %Flow
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
index a407cd20bf7624..4a13800eb88528 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -146,6 +146,4 @@ loopexit:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind writeonly }
+declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) nounwind writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll
index 69387e67c1c7ea..7218bb85f7f1a3 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll
@@ -83,9 +83,7 @@ define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare void @llvm.dbg.value(metadata, metadata, metadata) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 !llvm.dbg.cu = !{!0}
 !llvm.debugify = !{!2, !3}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
index 7408d3776ae220..1e4cb8a77ce161 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -11,7 +11,7 @@ define amdgpu_kernel void @kern_noargs() {
   ret void
 }
 
-define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
+define amdgpu_kernel void @kern_i8(i8 %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_i8(
 ; HSA-NEXT:    [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_KERNARG_SEGMENT]], i64 0
@@ -32,7 +32,7 @@ define amdgpu_kernel void @kern_i8(i8 %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
+define amdgpu_kernel void @kern_i16(i16 %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_i16(
 ; HSA-NEXT:    [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I16_KERNARG_SEGMENT]], i64 0
@@ -53,7 +53,7 @@ define amdgpu_kernel void @kern_i16(i16 %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_f16(half %arg) #0 {
+define amdgpu_kernel void @kern_f16(half %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_f16(
 ; HSA-NEXT:    [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F16_KERNARG_SEGMENT]], i64 0
@@ -76,7 +76,7 @@ define amdgpu_kernel void @kern_f16(half %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
+define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_zeroext_i8(
 ; HSA-NEXT:    [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0
@@ -97,7 +97,7 @@ define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
+define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_zeroext_i16(
 ; HSA-NEXT:    [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0
@@ -118,7 +118,7 @@ define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
+define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_signext_i8(
 ; HSA-NEXT:    [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0
@@ -139,7 +139,7 @@ define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 {
+define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_signext_i16(
 ; HSA-NEXT:    [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0
@@ -295,7 +295,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
   ret void
 }
 
-define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
+define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_v8i32(
 ; HSA-NEXT:    [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
@@ -314,7 +314,7 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
+define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_v8i64(
 ; HSA-NEXT:    [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
@@ -333,7 +333,7 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
+define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_v16i64(
 ; HSA-NEXT:    [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
@@ -402,7 +402,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
   ret void
 }
 
-define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
+define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_struct_b_packed(
 ; HSA-NEXT:    [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
@@ -421,7 +421,7 @@ define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
+define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" {
 ; HSA-LABEL: @kern_implicit_arg_num_bytes(
 ; HSA-NEXT:    [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0
@@ -440,7 +440,7 @@ define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 {
+define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" {
 ; HSA-LABEL: @kernel_implicitarg_no_struct_align(
 ; HSA-NEXT:    [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64
@@ -459,7 +459,7 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %a
   ret void
 }
 
-define amdgpu_kernel void @kern_lds_ptr(ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @kern_lds_ptr(ptr addrspace(3) %lds) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_lds_ptr(
 ; HSA-NEXT:    [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0
@@ -478,7 +478,7 @@ define amdgpu_kernel void @kern_lds_ptr(ptr addrspace(3) %lds) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_lds_ptr_si(ptr addrspace(3) %lds) #2 {
+define amdgpu_kernel void @kern_lds_ptr_si(ptr addrspace(3) %lds) nounwind "target-cpu"="tahiti" {
 ; GCN-LABEL: @kern_lds_ptr_si(
 ; GCN-NEXT:    [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; GCN-NEXT:    store i32 0, ptr addrspace(3) [[LDS:%.*]], align 4
@@ -488,7 +488,7 @@ define amdgpu_kernel void @kern_lds_ptr_si(ptr addrspace(3) %lds) #2 {
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
+define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i8_i8(
 ; HSA-NEXT:    [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0
@@ -520,7 +520,7 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 {
+define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i8_i8_i8(
 ; HSA-NEXT:    [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0
@@ -563,7 +563,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 {
+define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i8_i8_i8_i8(
 ; HSA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
@@ -617,7 +617,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
+define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i8_v3i8(
 ; HSA-NEXT:    [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0
@@ -649,7 +649,7 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
+define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i8_i16(
 ; HSA-NEXT:    [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0
@@ -681,7 +681,7 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
+define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i1_i1(
 ; HSA-NEXT:    [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0
@@ -713,7 +713,7 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 {
+define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i1_i1_i1(
 ; HSA-NEXT:    [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0
@@ -756,7 +756,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 {
+define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i1_i1_i1_i1(
 ; HSA-NEXT:    [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0
@@ -810,7 +810,7 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
+define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i1_v3i1(
 ; HSA-NEXT:    [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0
@@ -844,7 +844,7 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
+define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i1_i16(
 ; HSA-NEXT:    [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0
@@ -876,7 +876,7 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 {
+define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
 ; HSA-NEXT:    [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0
@@ -963,7 +963,7 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar
   ret void
 }
 
-define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
+define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_realign_f16_f16(
 ; HSA-NEXT:    [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0
@@ -999,7 +999,7 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_global_ptr(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @kern_global_ptr(ptr addrspace(1) %ptr) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_global_ptr(
 ; HSA-NEXT:    [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
@@ -1018,7 +1018,7 @@ define amdgpu_kernel void @kern_global_ptr(ptr addrspace(1) %ptr) #0 {
   ret void
 }
 
-define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) dereferenceable(42) %ptr) #0 {
+define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) dereferenceable(42) %ptr) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_global_ptr_dereferencable(
 ; HSA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
@@ -1037,7 +1037,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) deref
   ret void
 }
 
-define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(1) dereferenceable_or_null(128) %ptr) #0 {
+define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(1) dereferenceable_or_null(128) %ptr) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
 ; HSA-NEXT:    [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
@@ -1056,7 +1056,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @kern_nonnull_global_ptr(ptr addrspace(1) nonnull %ptr) #0 {
+define amdgpu_kernel void @kern_nonnull_global_ptr(ptr addrspace(1) nonnull %ptr) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_nonnull_global_ptr(
 ; HSA-NEXT:    [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
@@ -1075,7 +1075,7 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(ptr addrspace(1) nonnull %ptr
   ret void
 }
 
-define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %ptr) #0 {
+define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %ptr) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @kern_align32_global_ptr(
 ; HSA-NEXT:    [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
@@ -1094,7 +1094,7 @@ define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %
   ret void
 }
 
-define amdgpu_kernel void @kern_noalias_global_ptr(ptr addrspace(1) noalias %ptr) #0 {
+define amdgpu_kernel void @kern_noalias_global_ptr(ptr addrspace(1) noalias %ptr) nounwind "target-cpu"="kaveri" {
 ; GCN-LABEL: @kern_noalias_global_ptr(
 ; GCN-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; GCN-NEXT:    store volatile ptr addrspace(1) [[PTR:%.*]], ptr addrspace(1) undef, align 8
@@ -1104,7 +1104,7 @@ define amdgpu_kernel void @kern_noalias_global_ptr(ptr addrspace(1) noalias %ptr
   ret void
 }
 
-define amdgpu_kernel void @kern_noalias_global_ptr_x2(ptr addrspace(1) noalias %ptr0, ptr addrspace(1) noalias %ptr1) #0 {
+define amdgpu_kernel void @kern_noalias_global_ptr_x2(ptr addrspace(1) noalias %ptr0, ptr addrspace(1) noalias %ptr1) nounwind "target-cpu"="kaveri" {
 ; GCN-LABEL: @kern_noalias_global_ptr_x2(
 ; GCN-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; GCN-NEXT:    store volatile ptr addrspace(1) [[PTR0:%.*]], ptr addrspace(1) undef, align 8
@@ -1116,7 +1116,7 @@ define amdgpu_kernel void @kern_noalias_global_ptr_x2(ptr addrspace(1) noalias %
   ret void
 }
 
-define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 {
+define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @struct_i8_i8_arg(
 ; HSA-NEXT:  entry:
 ; HSA-NEXT:    [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -1147,7 +1147,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 {
+define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @struct_i8_i16_arg(
 ; HSA-NEXT:  entry:
 ; HSA-NEXT:    [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -1178,7 +1178,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 {
+define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @array_2xi8_arg(
 ; HSA-NEXT:  entry:
 ; HSA-NEXT:    [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -1209,7 +1209,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 {
+define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @array_2xi1_arg(
 ; HSA-NEXT:  entry:
 ; HSA-NEXT:    [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -1240,7 +1240,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
+define amdgpu_kernel void @only_empty_struct({} %empty) nounwind "target-cpu"="kaveri" {
 ; GCN-LABEL: @only_empty_struct(
 ; GCN-NEXT:    [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; GCN-NEXT:    ret void
@@ -1248,7 +1248,7 @@ define amdgpu_kernel void @only_empty_struct({} %empty) #0 {
   ret void
 }
 
-define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
+define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) nounwind "target-cpu"="kaveri" {
 ; HSA-LABEL: @empty_struct_with_other(
 ; HSA-NEXT:    [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
@@ -1690,10 +1690,6 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref
   ret void
 }
 
-attributes #0 = { nounwind "target-cpu"="kaveri" }
-attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
-attributes #2 = { nounwind "target-cpu"="tahiti" }
-
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
index e9d42dc70cbb9f..ebd2ec80d93958 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
@@ -102,6 +102,4 @@ define amdgpu_kernel void @memset_size_8(ptr addrspace(1) %dst, i8 %val) {
   ret void
 }
 
-declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg) #0
-
-attributes #0 = { argmemonly nounwind willreturn writeonly }
+declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg) argmemonly nounwind willreturn writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
index 0f4e790a6976f3..1645b946b2402f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
@@ -6,31 +6,31 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=1024 %s | FileCheck -check-prefixes=OPT,MAX1024 %s
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=0 %s | FileCheck -check-prefixes=OPT,ALL %s
 
-declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
-declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-
-declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
-declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p3.p5.i32(ptr addrspace(3) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p5.p3.i32(ptr addrspace(5) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p0.p1.i64(ptr nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p1.p0.i64(ptr addrspace(1) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p5.p1.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p0.p5.i64(ptr nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p5.p0.i64(ptr addrspace(5) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p1.p999.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(999) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p999.p1.i64(ptr addrspace(999) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p999.p998.i64(ptr addrspace(999) nocapture writeonly, ptr addrspace(998) nocapture readonly, i64, i1 immarg) #1
-
-declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) #1
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) argmemonly nounwind
+declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) argmemonly nounwind
+declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) argmemonly nounwind
+declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) argmemonly nounwind
+declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) argmemonly nounwind
+
+declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) argmemonly nounwind
+declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) argmemonly nounwind
+declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) argmemonly nounwind
+declare void @llvm.memmove.p3.p5.i32(ptr addrspace(3) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) argmemonly nounwind
+declare void @llvm.memmove.p5.p3.i32(ptr addrspace(5) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) argmemonly nounwind
+declare void @llvm.memmove.p0.p1.i64(ptr nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) argmemonly nounwind
+declare void @llvm.memmove.p1.p0.i64(ptr addrspace(1) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) argmemonly nounwind
+declare void @llvm.memmove.p5.p1.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) argmemonly nounwind
+declare void @llvm.memmove.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) argmemonly nounwind
+declare void @llvm.memmove.p0.p5.i64(ptr nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) argmemonly nounwind
+declare void @llvm.memmove.p5.p0.i64(ptr addrspace(5) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) argmemonly nounwind
+declare void @llvm.memmove.p1.p999.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(999) nocapture readonly, i64, i1 immarg) argmemonly nounwind
+declare void @llvm.memmove.p999.p1.i64(ptr addrspace(999) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) argmemonly nounwind
+declare void @llvm.memmove.p999.p998.i64(ptr addrspace(999) nocapture writeonly, ptr addrspace(998) nocapture readonly, i64, i1 immarg) argmemonly nounwind
+
+declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) argmemonly nounwind
 
 ; Test the upper bound for sizes to leave
-define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; MAX1024-LABEL: @max_size_small_static_memcpy_caller0(
 ; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 1024, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -54,7 +54,7 @@ define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1)
 }
 
 ; Smallest static size which will be expanded
-define amdgpu_kernel void @min_size_large_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @min_size_large_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @min_size_large_static_memcpy_caller0(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -77,7 +77,7 @@ define amdgpu_kernel void @min_size_large_static_memcpy_caller0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @max_size_small_static_memmove_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @max_size_small_static_memmove_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; MAX1024-LABEL: @max_size_small_static_memmove_caller0(
 ; MAX1024-NEXT:    call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 1024, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -115,7 +115,7 @@ define amdgpu_kernel void @max_size_small_static_memmove_caller0(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @min_size_large_static_memmove_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @min_size_large_static_memmove_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @min_size_large_static_memmove_caller0(
 ; OPT-NEXT:    [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]]
 ; OPT-NEXT:    [[COMPARE_N_TO_0:%.*]] = icmp eq i64 1025, 0
@@ -149,7 +149,7 @@ define amdgpu_kernel void @min_size_large_static_memmove_caller0(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @max_size_small_static_memset_caller0(ptr addrspace(1) %dst, i8 %val) #0 {
+define amdgpu_kernel void @max_size_small_static_memset_caller0(ptr addrspace(1) %dst, i8 %val) nounwind {
 ; MAX1024-LABEL: @max_size_small_static_memset_caller0(
 ; MAX1024-NEXT:    call void @llvm.memset.p1.i64(ptr addrspace(1) [[DST:%.*]], i8 [[VAL:%.*]], i64 1024, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -170,7 +170,7 @@ define amdgpu_kernel void @max_size_small_static_memset_caller0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @min_size_large_static_memset_caller0(ptr addrspace(1) %dst, i8 %val) #0 {
+define amdgpu_kernel void @min_size_large_static_memset_caller0(ptr addrspace(1) %dst, i8 %val) nounwind {
 ; OPT-LABEL: @min_size_large_static_memset_caller0(
 ; OPT-NEXT:    br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
 ; OPT:       loadstoreloop:
@@ -187,7 +187,7 @@ define amdgpu_kernel void @min_size_large_static_memset_caller0(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @variable_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @variable_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) nounwind {
 ; OPT-LABEL: @variable_memcpy_caller0(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[N:%.*]], 4
 ; OPT-NEXT:    [[TMP2:%.*]] = and i64 [[N]], 15
@@ -223,7 +223,7 @@ define amdgpu_kernel void @variable_memcpy_caller0(ptr addrspace(1) %dst, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @variable_memcpy_caller1(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @variable_memcpy_caller1(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) nounwind {
 ; OPT-LABEL: @variable_memcpy_caller1(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[N:%.*]], 4
 ; OPT-NEXT:    [[TMP2:%.*]] = and i64 [[N]], 15
@@ -259,7 +259,7 @@ define amdgpu_kernel void @variable_memcpy_caller1(ptr addrspace(1) %dst, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0, ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %n, i64 %m) #0 {
+define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0, ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %n, i64 %m) nounwind {
 ; OPT-LABEL: @memcpy_multi_use_one_function(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[N:%.*]], 4
 ; OPT-NEXT:    [[TMP2:%.*]] = and i64 [[N]], 15
@@ -324,7 +324,7 @@ define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0,
   ret void
 }
 
-define amdgpu_kernel void @memcpy_alt_type(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_alt_type(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n) nounwind {
 ; OPT-LABEL: @memcpy_alt_type(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i32 [[N:%.*]], 3
 ; OPT-NEXT:    [[TMP2:%.*]] = and i32 [[N]], 7
@@ -361,7 +361,7 @@ define amdgpu_kernel void @memcpy_alt_type(ptr addrspace(1) %dst, ptr addrspace(
 }
 
 ; One of the uses in the function should be expanded, the other left alone.
-define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspace(1) %dst0, ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspace(1) %dst0, ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %n) nounwind {
 ; MAX1024-LABEL: @memcpy_multi_use_one_function_keep_small(
 ; MAX1024-NEXT:    [[TMP1:%.*]] = lshr i64 [[N:%.*]], 4
 ; MAX1024-NEXT:    [[TMP2:%.*]] = and i64 [[N]], 15
@@ -449,7 +449,7 @@ define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1028(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -472,7 +472,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1025(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -495,7 +495,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1026(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -518,7 +518,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1032(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -541,7 +541,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1034(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -568,7 +568,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1035(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -599,7 +599,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1036(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -626,7 +626,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1039(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -661,7 +661,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align2_global_align2_1039(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align2_global_align2_1039(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align2_global_align2_1039(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -684,7 +684,7 @@ define amdgpu_kernel void @memcpy_global_align2_global_align2_1039(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1027(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -711,7 +711,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align2_global_align4_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align2_global_align4_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align2_global_align4_1027(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -734,7 +734,7 @@ define amdgpu_kernel void @memcpy_global_align2_global_align4_1027(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align2_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align2_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align2_1027(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -757,7 +757,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align2_1027(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) nounwind {
 ; OPT-LABEL: @memcpy_private_align4_private_align4_1027(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -784,7 +784,7 @@ define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align2_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align2_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) nounwind {
 ; OPT-LABEL: @memcpy_private_align2_private_align4_1027(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -807,7 +807,7 @@ define amdgpu_kernel void @memcpy_private_align2_private_align4_1027(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) nounwind {
 ; OPT-LABEL: @memcpy_private_align1_private_align4_1027(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -834,7 +834,7 @@ define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align4_private_align2_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align4_private_align2_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) nounwind {
 ; OPT-LABEL: @memcpy_private_align4_private_align2_1027(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -857,7 +857,7 @@ define amdgpu_kernel void @memcpy_private_align4_private_align2_1027(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) nounwind {
 ; OPT-LABEL: @memcpy_private_align4_private_align1_1027(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -884,7 +884,7 @@ define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align2_private_align2_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align2_private_align2_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) nounwind {
 ; OPT-LABEL: @memcpy_private_align2_private_align2_1027(
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
@@ -907,7 +907,7 @@ define amdgpu_kernel void @memcpy_private_align2_private_align2_1027(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_variable(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[N:%.*]], 4
 ; OPT-NEXT:    [[TMP2:%.*]] = and i64 [[N]], 15
@@ -943,7 +943,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_variable(ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align2_global_align2_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @memcpy_global_align2_global_align2_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) nounwind {
 ; OPT-LABEL: @memcpy_global_align2_global_align2_variable(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[N:%.*]], 1
 ; OPT-NEXT:    [[TMP2:%.*]] = and i64 [[N]], 1
@@ -979,7 +979,7 @@ define amdgpu_kernel void @memcpy_global_align2_global_align2_variable(ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align1_global_align1_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @memcpy_global_align1_global_align1_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) nounwind {
 ; OPT-LABEL: @memcpy_global_align1_global_align1_variable(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i64 [[N:%.*]], 4
 ; OPT-NEXT:    [[TMP2:%.*]] = and i64 [[N]], 15
@@ -1015,7 +1015,7 @@ define amdgpu_kernel void @memcpy_global_align1_global_align1_variable(ptr addrs
   ret void
 }
 
-define amdgpu_kernel void @memcpy_local_align4_local_align4_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_local_align4_local_align4_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) nounwind {
 ; OPT-LABEL: @memcpy_local_align4_local_align4_variable(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i32 [[N:%.*]], 3
 ; OPT-NEXT:    [[TMP2:%.*]] = and i32 [[N]], 7
@@ -1051,7 +1051,7 @@ define amdgpu_kernel void @memcpy_local_align4_local_align4_variable(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @memcpy_local_align2_local_align2_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_local_align2_local_align2_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) nounwind {
 ; OPT-LABEL: @memcpy_local_align2_local_align2_variable(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i32 [[N:%.*]], 1
 ; OPT-NEXT:    [[TMP2:%.*]] = and i32 [[N]], 1
@@ -1087,7 +1087,7 @@ define amdgpu_kernel void @memcpy_local_align2_local_align2_variable(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @memcpy_local_align1_local_align1_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_local_align1_local_align1_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) nounwind {
 ; OPT-LABEL: @memcpy_local_align1_local_align1_variable(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i32 [[N:%.*]], 3
 ; OPT-NEXT:    [[TMP2:%.*]] = and i32 [[N]], 7
@@ -1123,7 +1123,7 @@ define amdgpu_kernel void @memcpy_local_align1_local_align1_variable(ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @memcpy_local_align4_global_align4_variable(ptr addrspace(3) %dst, ptr addrspace(1) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_local_align4_global_align4_variable(ptr addrspace(3) %dst, ptr addrspace(1) %src, i32 %n) nounwind {
 ; OPT-LABEL: @memcpy_local_align4_global_align4_variable(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i32 [[N:%.*]], 3
 ; OPT-NEXT:    [[TMP2:%.*]] = and i32 [[N]], 7
@@ -1159,7 +1159,7 @@ define amdgpu_kernel void @memcpy_local_align4_global_align4_variable(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_local_align4_variable(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_global_align4_local_align4_variable(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n) nounwind {
 ; OPT-LABEL: @memcpy_global_align4_local_align4_variable(
 ; OPT-NEXT:    [[TMP1:%.*]] = lshr i32 [[N:%.*]], 3
 ; OPT-NEXT:    [[TMP2:%.*]] = and i32 [[N]], 7
@@ -1195,7 +1195,7 @@ define amdgpu_kernel void @memcpy_global_align4_local_align4_variable(ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_16(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_16(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_16(
 ; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 16, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -1218,7 +1218,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_16(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_12(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_12(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_12(
 ; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 12, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -1238,7 +1238,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_12(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_8(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_8(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_8(
 ; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 8, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -1254,7 +1254,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_8(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_10(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_10(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_10(
 ; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 10, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -1274,7 +1274,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_10(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_4(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_4(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_4(
 ; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 4, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -1290,7 +1290,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_4(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_2(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_2(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_2(
 ; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 2, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -1306,7 +1306,7 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_2(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_1(
 ; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 1, i1 false)
 ; MAX1024-NEXT:    ret void
@@ -1774,6 +1774,3 @@ entry:
 }
 
 declare i64 @llvm.umin.i64(i64, i64)
-
-attributes #0 = { nounwind }
-attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-check-metadata.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-check-metadata.ll
index d96805656f72cf..da6d83550dcc1f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-check-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-check-metadata.ll
@@ -8,7 +8,7 @@ target triple = "amdgcn-amd-amdhsa"
 
 @global_barrier_state = hidden addrspace(3) global i32 undef, align 4
 
-define i32 @rw() #0 {
+define i32 @rw() noinline {
 entry:
   %0 = atomicrmw add ptr addrspace(3) @global_barrier_state, i32 1 acq_rel, align 4
   ret i32 %0
@@ -19,5 +19,3 @@ entry:
   %0 = call i32 @rw()
   ret void
 }
-
-attributes #0 = { noinline  }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll
index ed7bd22eea4b10..3bf7a15525b13a 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range:
 ; CHECK-NOT: v0
 ; CHECK: {{flat|buffer}}_store_dword {{.*}}v0
-define amdgpu_kernel void @test_workitem_id_x_known_max_range(ptr addrspace(1) nocapture %out) #0 {
+define amdgpu_kernel void @test_workitem_id_x_known_max_range(ptr addrspace(1) nocapture %out) norecurse nounwind "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
   %and = and i32 %id, 1023
@@ -16,7 +16,7 @@ entry:
 ; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range:
 ; CHECK-NOT: v_and_b32
 ; CHECK: {{flat|buffer}}_store_dword {{.*}}v0
-define amdgpu_kernel void @test_workitem_id_x_known_trunc_1_bit_range(ptr addrspace(1) nocapture %out) #0 {
+define amdgpu_kernel void @test_workitem_id_x_known_trunc_1_bit_range(ptr addrspace(1) nocapture %out) norecurse nounwind "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
   %and = and i32 %id, 511
@@ -28,7 +28,7 @@ entry:
 ; CHECK-NOT: v0
 ; CHECK-NOT: v_and_b32
 ; CHECK: {{flat|buffer}}_store_dword {{.*}}v0
-define amdgpu_kernel void @test_workitem_id_x_known_max_range_m1(ptr addrspace(1) nocapture %out) #0 {
+define amdgpu_kernel void @test_workitem_id_x_known_max_range_m1(ptr addrspace(1) nocapture %out) norecurse nounwind "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1
   %and = and i32 %id, 255
@@ -37,10 +37,7 @@ entry:
 }
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { norecurse nounwind "amdgpu-flat-work-group-size"="1,256" }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 !0 = !{i32 0, i32 1024}
 !1 = !{i32 0, i32 1023}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
index 9547f08d3eba6b..aae117ba17759c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
@@ -204,11 +204,11 @@ define amdgpu_kernel void @caller() {
 ; GFX12-GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GFX12-GISEL-NEXT:    s_endpgm
   %idx = call i32 @llvm.amdgcn.workgroup.id.x()
-  call void @callee(i32 %idx) #0
+  call void @callee(i32 %idx) nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
   ret void
 }
 
-declare void @callee(i32) #0
+declare void @callee(i32) nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
 
 define void @workgroup_ids_device_func(ptr addrspace(1) %outx, ptr addrspace(1) %outy, ptr addrspace(1) %outz) {
 ; GFX9-LABEL: workgroup_ids_device_func:
@@ -290,7 +290,5 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
 declare i32 @llvm.amdgcn.workgroup.id.y()
 declare i32 @llvm.amdgcn.workgroup.id.z()
 declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
-
-attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX9ARCH: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
index 994ef22539a65f..14cedf97645894 100644
--- a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-define amdgpu_kernel void @s_lshr_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_lshr_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <2 x i16> %rhs) nounwind {
 ; GFX9-LABEL: s_lshr_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -76,7 +76,7 @@ define amdgpu_kernel void @s_lshr_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <
   ret void
 }
 
-define amdgpu_kernel void @v_lshr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_lshr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_lshr_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -164,7 +164,7 @@ define amdgpu_kernel void @v_lshr_v2i16(ptr addrspace(1) %out, ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @lshr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @lshr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) nounwind {
 ; GFX9-LABEL: lshr_v_s_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -257,7 +257,7 @@ define amdgpu_kernel void @lshr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @lshr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @lshr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) nounwind {
 ; GFX9-LABEL: lshr_s_v_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -350,7 +350,7 @@ define amdgpu_kernel void @lshr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @lshr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @lshr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: lshr_imm_v_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -435,7 +435,7 @@ define amdgpu_kernel void @lshr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @lshr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @lshr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: lshr_v_imm_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -516,7 +516,7 @@ define amdgpu_kernel void @lshr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @v_lshr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_lshr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_lshr_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -617,7 +617,7 @@ define amdgpu_kernel void @v_lshr_v4i16(ptr addrspace(1) %out, ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @lshr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @lshr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: lshr_v_imm_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -706,7 +706,4 @@ define amdgpu_kernel void @lshr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index 6672568b98a203..43635f8f4a6dbb 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -11,31 +11,31 @@ target triple = "amdgcn-amd-amdhsa"
 @kernel_round1.collisionsNum = external hidden addrspace(3) global i32, align 4
 
 ; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
-declare hidden i64 @_Z13get_global_idj(i32 noundef) local_unnamed_addr #0
+declare hidden i64 @_Z13get_global_idj(i32 noundef) local_unnamed_addr convergent mustprogress nofree nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 
 ; Function Attrs: convergent nounwind
-declare hidden i32 @_Z10atomic_addPU3AS1Vjj(ptr addrspace(1) noundef, i32 noundef) local_unnamed_addr #1
+declare hidden i32 @_Z10atomic_addPU3AS1Vjj(ptr addrspace(1) noundef, i32 noundef) local_unnamed_addr convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 
 ; Function Attrs: convergent nounwind
-declare hidden i32 @_Z10atomic_subPU3AS1Vjj(ptr addrspace(1) noundef, i32 noundef) local_unnamed_addr #1
+declare hidden i32 @_Z10atomic_subPU3AS1Vjj(ptr addrspace(1) noundef, i32 noundef) local_unnamed_addr convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 
 ; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
-declare hidden i64 @_Z12get_local_idj(i32 noundef) local_unnamed_addr #0
+declare hidden i64 @_Z12get_local_idj(i32 noundef) local_unnamed_addr convergent mustprogress nofree nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 
 ; Function Attrs: convergent nounwind
-declare hidden void @_Z7barrierj(i32 noundef) local_unnamed_addr #1
+declare hidden void @_Z7barrierj(i32 noundef) local_unnamed_addr convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 
 ; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
-declare hidden i32 @_Z3minjj(i32 noundef, i32 noundef) local_unnamed_addr #0
+declare hidden i32 @_Z3minjj(i32 noundef, i32 noundef) local_unnamed_addr convergent mustprogress nofree nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 
 ; Function Attrs: convergent nounwind
-declare hidden i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef) local_unnamed_addr #1
+declare hidden i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef) local_unnamed_addr convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 
 ; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
-declare hidden i64 @_Z14get_local_sizej(i32 noundef) local_unnamed_addr #0
+declare hidden i64 @_Z14get_local_sizej(i32 noundef) local_unnamed_addr convergent mustprogress nofree nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
 
 ; Function Attrs: convergent norecurse nounwind
-define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture noundef readonly align 1 %0, ptr addrspace(1) nocapture noundef writeonly align 1 %1, ptr addrspace(1) nocapture noundef readonly align 4 %2, ptr addrspace(1) noundef align 4 %3, ptr addrspace(1) nocapture noundef readnone align 4 %4) local_unnamed_addr #2 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 {
+define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture noundef readonly align 1 %0, ptr addrspace(1) nocapture noundef writeonly align 1 %1, ptr addrspace(1) nocapture noundef readonly align 4 %2, ptr addrspace(1) noundef align 4 %3, ptr addrspace(1) nocapture noundef readnone align 4 %4) local_unnamed_addr convergent norecurse nounwind "amdgpu-flat-work-group-size"="64,64" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" "uniform-work-group-size"="true" !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 {
 ; CHECK-LABEL: kernel_round1:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_add_u32 s10, s10, s15
@@ -499,14 +499,14 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_branch .LBB0_27
 ; CHECK-NEXT:  .LBB0_33:
 ; CHECK-NEXT:    s_endpgm
-  %6 = tail call i64 @_Z13get_global_idj(i32 noundef 0) #4
+  %6 = tail call i64 @_Z13get_global_idj(i32 noundef 0) convergent nounwind willreturn memory(none)
   %7 = trunc i64 %6 to i32
-  %8 = tail call i64 @_Z12get_local_idj(i32 noundef 0) #4
+  %8 = tail call i64 @_Z12get_local_idj(i32 noundef 0) convergent nounwind willreturn memory(none)
   %9 = trunc i64 %8 to i32
   %10 = mul i32 %9, 14
   %11 = getelementptr inbounds i8, ptr addrspace(3) @kernel_round1.first_words_data, i32 %10
   store i32 0, ptr addrspace(3) @kernel_round1.collisionsNum, align 4, !tbaa !11
-  tail call void @_Z7barrierj(i32 noundef 1) #5
+  tail call void @_Z7barrierj(i32 noundef 1) convergent nounwind
   %12 = lshr i64 %6, 3
   %13 = shl i32 %7, 2
   %14 = and i32 %13, 28
@@ -515,7 +515,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
   %17 = load i32, ptr addrspace(1) %16, align 4, !tbaa !11
   %18 = lshr i32 %17, %14
   %19 = and i32 %18, 15
-  %20 = tail call i32 @_Z3minjj(i32 noundef %19, i32 noundef 12) #4
+  %20 = tail call i32 @_Z3minjj(i32 noundef %19, i32 noundef 12) convergent nounwind willreturn memory(none)
   %21 = icmp eq i32 %20, 0
   br i1 %21, label %119, label %27
 
@@ -575,7 +575,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 
 61:                                               ; preds = %53
   %62 = add i32 %56, 1
-  %63 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) #5
+  %63 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) convergent nounwind
   %64 = getelementptr inbounds i32, ptr addrspace(3) @kernel_round1.collisionsData, i32 %63
   store i32 %57, ptr addrspace(3) %64, align 4, !tbaa !11
   br label %65
@@ -591,7 +591,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 71:                                               ; preds = %65
   %72 = add i32 %57, 1
   %73 = add i32 %66, 1
-  %74 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) #5
+  %74 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) convergent nounwind
   %75 = getelementptr inbounds i32, ptr addrspace(3) @kernel_round1.collisionsData, i32 %74
   store i32 %72, ptr addrspace(3) %75, align 4, !tbaa !11
   br label %76
@@ -607,7 +607,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 82:                                               ; preds = %76
   %83 = add i32 %57, 2
   %84 = add i32 %77, 1
-  %85 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) #5
+  %85 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) convergent nounwind
   %86 = getelementptr inbounds i32, ptr addrspace(3) @kernel_round1.collisionsData, i32 %85
   store i32 %83, ptr addrspace(3) %86, align 4, !tbaa !11
   br label %87
@@ -623,7 +623,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 93:                                               ; preds = %87
   %94 = add i32 %57, 3
   %95 = add i32 %88, 1
-  %96 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) #5
+  %96 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) convergent nounwind
   %97 = getelementptr inbounds i32, ptr addrspace(3) @kernel_round1.collisionsData, i32 %96
   store i32 %94, ptr addrspace(3) %97, align 4, !tbaa !11
   br label %98
@@ -646,7 +646,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 
 110:                                              ; preds = %103
   %111 = add i32 %105, 1
-  %112 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) #5
+  %112 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) convergent nounwind
   %113 = getelementptr inbounds i32, ptr addrspace(3) @kernel_round1.collisionsData, i32 %112
   store i32 %106, ptr addrspace(3) %113, align 4, !tbaa !11
   br label %114
@@ -659,7 +659,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
   br i1 %118, label %103, label %32
 
 119:                                              ; preds = %32, %22, %5
-  tail call void @_Z7barrierj(i32 noundef 1) #5
+  tail call void @_Z7barrierj(i32 noundef 1) convergent nounwind
   %120 = load i32, ptr addrspace(3) @kernel_round1.collisionsNum, align 4, !tbaa !11
   %121 = icmp ugt i32 %120, %9
   br i1 %121, label %122, label %206
@@ -722,14 +722,14 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
   %173 = and i32 %172, 28
   %174 = getelementptr inbounds i32, ptr addrspace(1) %3, i64 %171
   %175 = shl nuw nsw i32 1, %173
-  %176 = tail call i32 @_Z10atomic_addPU3AS1Vjj(ptr addrspace(1) noundef %174, i32 noundef %175) #5
+  %176 = tail call i32 @_Z10atomic_addPU3AS1Vjj(ptr addrspace(1) noundef %174, i32 noundef %175) convergent nounwind
   %177 = lshr i32 %176, %173
   %178 = and i32 %177, 15
   %179 = icmp ugt i32 %178, 11
   br i1 %179, label %180, label %182
 
 180:                                              ; preds = %154
-  %181 = tail call i32 @_Z10atomic_subPU3AS1Vjj(ptr addrspace(1) noundef %174, i32 noundef %175) #5
+  %181 = tail call i32 @_Z10atomic_subPU3AS1Vjj(ptr addrspace(1) noundef %174, i32 noundef %175) convergent nounwind
   br label %201
 
 182:                                              ; preds = %154
@@ -758,7 +758,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
   br label %201
 
 201:                                              ; preds = %182, %180, %124
-  %202 = tail call i64 @_Z14get_local_sizej(i32 noundef 0) #4
+  %202 = tail call i64 @_Z14get_local_sizej(i32 noundef 0) convergent nounwind willreturn memory(none)
   %203 = add i64 %202, %127
   %204 = trunc i64 %203 to i32
   %205 = icmp ugt i32 %120, %204
@@ -770,7 +770,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 
 ; Removed most of the if-else blocks
 
-define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapture noundef readonly align 1 %.0, ptr addrspace(1) nocapture noundef writeonly align 1 %.1, ptr addrspace(1) nocapture noundef readonly align 4 %.2, ptr addrspace(1) noundef align 4 %.3, ptr addrspace(1) nocapture noundef readnone align 4 %.4) local_unnamed_addr #2 !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 {
+define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapture noundef readonly align 1 %.0, ptr addrspace(1) nocapture noundef writeonly align 1 %.1, ptr addrspace(1) nocapture noundef readonly align 4 %.2, ptr addrspace(1) noundef align 4 %.3, ptr addrspace(1) nocapture noundef readnone align 4 %.4) local_unnamed_addr convergent norecurse nounwind "amdgpu-flat-work-group-size"="64,64" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" "uniform-work-group-size"="true" !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 !kernel_arg_name !9 !reqd_work_group_size !10 {
 ; CHECK-LABEL: kernel_round1_short:
 ; CHECK:       ; %bb.0: ; %.5
 ; CHECK-NEXT:    s_add_u32 s10, s10, s15
@@ -972,14 +972,14 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; CHECK-NEXT:    s_endpgm
 .5:
-  %.6 = tail call i64 @_Z13get_global_idj(i32 noundef 0) #4
+  %.6 = tail call i64 @_Z13get_global_idj(i32 noundef 0) convergent nounwind willreturn memory(none)
   %.7 = trunc i64 %.6 to i32
-  %.8 = tail call i64 @_Z12get_local_idj(i32 noundef 0) #4
+  %.8 = tail call i64 @_Z12get_local_idj(i32 noundef 0) convergent nounwind willreturn memory(none)
   %.9 = trunc i64 %.8 to i32
   %.10 = mul i32 %.9, 14
   %.11 = getelementptr inbounds i8, ptr addrspace(3) @kernel_round1.first_words_data, i32 %.10
   store i32 0, ptr addrspace(3) @kernel_round1.collisionsNum, align 4, !tbaa !11
-  tail call void @_Z7barrierj(i32 noundef 1) #5
+  tail call void @_Z7barrierj(i32 noundef 1) convergent nounwind
   %.12 = lshr i64 %.6, 3
   %.13 = shl i32 %.7, 2
   %.14 = and i32 %.13, 28
@@ -988,7 +988,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
   %.17 = load i32, ptr addrspace(1) %.16, align 4, !tbaa !11
   %.18 = lshr i32 %.17, %.14
   %.19 = and i32 %.18, 15
-  %.20 = tail call i32 @_Z3minjj(i32 noundef %.19, i32 noundef 12) #4
+  %.20 = tail call i32 @_Z3minjj(i32 noundef %.19, i32 noundef 12) convergent nounwind willreturn memory(none)
   %.21 = icmp eq i32 %.20, 0
   %.23 = add i32 %.20, -1
   %.24 = icmp eq i32 %.23, 0
@@ -1051,7 +1051,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
 
 .110:                                              ; preds = %.103
   %.111 = add i32 %.105, 1
-  %.112 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) #5
+  %.112 = tail call i32 @_Z10atomic_incPU3AS3Vj(ptr addrspace(3) noundef @kernel_round1.collisionsNum) convergent nounwind
   %.113 = getelementptr inbounds i32, ptr addrspace(3) @kernel_round1.collisionsData, i32 %.112
   store i32 %.106, ptr addrspace(3) %.113, align 4, !tbaa !11
   br label %.114
@@ -1064,7 +1064,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
   br i1 %.118, label %.103, label %.32
 
 .119:                                              ; preds = %.32, %.22, %.5
-  tail call void @_Z7barrierj(i32 noundef 1) #5
+  tail call void @_Z7barrierj(i32 noundef 1) convergent nounwind
   %.120 = load i32, ptr addrspace(3) @kernel_round1.collisionsNum, align 4, !tbaa !11
   %.121 = icmp ugt i32 %.120, %.9
   br label %.206
@@ -1074,14 +1074,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare i64 @llvm.fshl.i64(i64, i64, i64) #3
-
-attributes #0 = { convergent mustprogress nofree nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" }
-attributes #1 = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" }
-attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="64,64" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" "uniform-work-group-size"="true" }
-attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #4 = { convergent nounwind willreturn memory(none) }
-attributes #5 = { convergent nounwind }
+declare i64 @llvm.fshl.i64(i64, i64, i64) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 !llvm.module.flags = !{!0, !1, !2}
 !opencl.ocl.version = !{!3}
diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
index e94aa4b8ce3d1c..6a999ba05cbe3b 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
@@ -10,10 +10,10 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare float @llvm.fmuladd.f32(float, float, float) #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
 
 ; (fadd (fmul x, y), z) -> (fma x, y, z)
 ; FUNC-LABEL: {{^}}combine_to_mad_f32_0:
@@ -33,8 +33,8 @@ declare float @llvm.fmuladd.f32(float, float, float) #0
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; SI-STD: buffer_store_dword [[C]]
-define amdgpu_kernel void @combine_to_mad_f32_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_f32_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -72,8 +72,8 @@ define amdgpu_kernel void @combine_to_mad_f32_0(ptr addrspace(1) noalias %out, p
 ; SI-STD-DAG: buffer_store_dword [[C]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_mad_f32_0_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_f32_0_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -109,8 +109,8 @@ define amdgpu_kernel void @combine_to_mad_f32_0_2use(ptr addrspace(1) noalias %o
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; SI-STD: buffer_store_dword [[C]]
-define amdgpu_kernel void @combine_to_mad_f32_1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_f32_1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -139,8 +139,8 @@ define amdgpu_kernel void @combine_to_mad_f32_1(ptr addrspace(1) noalias %out, p
 ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
 
 ; SI: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @combine_to_mad_fsub_0_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_fsub_0_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -176,8 +176,8 @@ define amdgpu_kernel void @combine_to_mad_fsub_0_f32(ptr addrspace(1) noalias %o
 ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_mad_fsub_0_f32_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_fsub_0_f32_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -211,8 +211,8 @@ define amdgpu_kernel void @combine_to_mad_fsub_0_f32_2use(ptr addrspace(1) noali
 ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
 
 ; SI: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @combine_to_mad_fsub_1_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_fsub_1_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -248,8 +248,8 @@ define amdgpu_kernel void @combine_to_mad_fsub_1_f32(ptr addrspace(1) noalias %o
 ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_mad_fsub_1_f32_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_fsub_1_f32_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -284,8 +284,8 @@ define amdgpu_kernel void @combine_to_mad_fsub_1_f32_2use(ptr addrspace(1) noali
 ; SI-DENORM-SLOWFMAF: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
 
 ; SI: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @combine_to_mad_fsub_2_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_fsub_2_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -323,8 +323,8 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32(ptr addrspace(1) noalias %o
 ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -367,8 +367,8 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_neg(ptr addrspace(1)
 ; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -412,8 +412,8 @@ define amdgpu_kernel void @combine_to_mad_fsub_2_f32_2uses_mul(ptr addrspace(1)
 ; SI-DENORM: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[C]]
 
 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -428,7 +428,7 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(ptr addrspace(1)
   %v = load volatile float, ptr addrspace(1) %gep.4
 
   %tmp0 = fmul float %u, %v
-  %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
+  %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) nounwind readnone
   %tmp2 = fsub float %tmp1, %z
 
   store float %tmp2, ptr addrspace(1) %gep.out
@@ -455,8 +455,8 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_0_f32(ptr addrspace(1)
 
 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @aggressive_combine_to_mad_fsub_1_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @aggressive_combine_to_mad_fsub_1_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -471,7 +471,7 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_1_f32(ptr addrspace(1)
   %v = load volatile float, ptr addrspace(1) %gep.4
 
   %tmp0 = fmul float %u, %v
-  %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
+  %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) nounwind readnone
   %tmp2 = fsub float %x, %tmp1
 
   store float %tmp2, ptr addrspace(1) %gep.out
@@ -505,8 +505,8 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_1_f32(ptr addrspace(1)
 
 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -521,7 +521,7 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(ptr addrspace(1)
   %v = load volatile float, ptr addrspace(1) %gep.4
 
   %tmp0 = fmul float %u, %v
-  %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
+  %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) nounwind readnone
   %tmp2 = fsub float %tmp1, %z
 
   store float %tmp2, ptr addrspace(1) %gep.out
@@ -556,8 +556,8 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_2_f32(ptr addrspace(1)
 
 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
   %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
@@ -573,12 +573,9 @@ define amdgpu_kernel void @aggressive_combine_to_mad_fsub_3_f32(ptr addrspace(1)
 
   ; nsz flag is needed since this combine may change sign of zero
   %tmp0 = fmul nsz float %u, %v
-  %tmp1 = call nsz float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
+  %tmp1 = call nsz float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) nounwind readnone
   %tmp2 = fsub nsz float %x, %tmp1
 
   store float %tmp2, ptr addrspace(1) %gep.out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
index 819b6ca98b3a83..db9cce43724319 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
@@ -9,7 +9,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-CI %s
 
-define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %src1, half %src2) #0 {
+define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -58,7 +58,7 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %s
   ret <2 x half> %vec.result
 }
 
-define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %src1, half %src2) #0 {
+define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -115,7 +115,7 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %s
   ret <2 x half> %vec.result
 }
 
-define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src1, half %src2, half %lo) #0 {
+define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src1, half %src2, half %lo) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -171,7 +171,7 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src
   ret <2 x half> %vec.result
 }
 
-define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) #0 {
+define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -243,7 +243,7 @@ define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, ha
   ret i32 %shr
 }
 
-define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) #0 {
+define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
 ; SDAG-GFX11:       ; %bb.0:
 ; SDAG-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -328,7 +328,7 @@ define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src
   ret i32 %shr
 }
 
-define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 {
+define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -383,7 +383,7 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %
   ret <2 x half> %vec.result
 }
 
-define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half %src0, half %src1, half %src2) #0 {
+define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -442,7 +442,7 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half
   ret <2 x half> %vec.result
 }
 
-define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use(half %src0, half %src1, half %src2) #0 {
+define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX11-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -519,12 +519,9 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi
   ret <2 x half> %vec.result
 }
 
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone speculatable }
+declare half @llvm.minnum.f16(half, half) nounwind readnone speculatable
+declare half @llvm.maxnum.f16(half, half) nounwind readnone speculatable
+declare float @llvm.minnum.f32(float, float) nounwind readnone speculatable
+declare float @llvm.maxnum.f32(float, float) nounwind readnone speculatable
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone speculatable
+declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 5b7f0e72b70da5..6e64de47927a38 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -11,7 +11,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-CI %s
 
-define half @mixlo_simple(float %src0, float %src1, float %src2) #0 {
+define half @mixlo_simple(float %src0, float %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: mixlo_simple:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -103,7 +103,7 @@ define half @mixlo_simpl_no_flush(float %src0, float %src1, float %src2) {
   ret half %cvt.result
 }
 
-define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
+define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -216,7 +216,7 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo_no_flush(half %src0, half %src1,
   ret half %cvt.result
 }
 
-define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
+define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -267,7 +267,7 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2
   ret half %cvt.result
 }
 
-define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 {
+define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -328,7 +328,7 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr
   ret half %clamp
 }
 
-define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 {
+define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -388,7 +388,7 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src
 ; FIXME(DAG): Should abe able to avoid extra register because first
 ; operation only clobbers relevant lane.
 
-define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_v2f32:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -491,7 +491,7 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half
   ret <2 x half> %cvt.result
 }
 
-define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
+define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_v3f32:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -639,7 +639,7 @@ define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half
   ret <3 x half> %cvt.result
 }
 
-define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
+define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_v4f32:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -825,7 +825,7 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half
 
 ; FIXME (DAG): Fold clamp
 
-define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -947,7 +947,7 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s
 ; FIXME (DAG): Should be packed into 2 registers per argument?
 ; FIXME (GIsel): V_PK_MAX clamp could be folded into mixlo
 
-define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
+define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v3f32_clamp_postcvt:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1152,7 +1152,7 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
   ret <3 x half> %clamp
 }
 
-define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
+define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v4f32_clamp_postcvt:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1396,7 +1396,7 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s
 ; FIXME (GISel): Packed Vectors handling isn't great for now, so we don't end up with
 ;  a build_vector to select the mixhi. Issue is more specifically with how insert_vector_elt is being
 ;  legalized (bitwise ops instead of shuffle/build_vector for instance).
-define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1552,7 +1552,7 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half>
   ret <2 x half> %insert
 }
 
-define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1715,7 +1715,7 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half>
 
 ; FIXME (DAG): Should be able to use mixlo/mixhi
 
-define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_clamp_precvt:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1861,7 +1861,7 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr
 
 ; FIXME (DAG): Handling undef 4th component
 
-define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
+define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v3f32_clamp_precvt:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2041,7 +2041,7 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr
   ret <3 x half> %cvt.result
 }
 
-define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
+define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v4f32_clamp_precvt:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2267,7 +2267,7 @@ define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %sr
   ret <4 x half> %cvt.result
 }
 
-define i32 @mixlo_zext(float %src0, float %src1, float %src2) #0 {
+define i32 @mixlo_zext(float %src0, float %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: mixlo_zext:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2317,7 +2317,7 @@ define i32 @mixlo_zext(float %src0, float %src1, float %src2) #0 {
   ret i32 %cvt.result.i32
 }
 
-define half @mixlo_fptrunc(float %a, float %b) #0 {
+define half @mixlo_fptrunc(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: mixlo_fptrunc:
 ; GFX1100:       ; %bb.0: ; %.entry
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2410,7 +2410,7 @@ define half @mixlo_fptrunc_no_flush(float %a, float %b) {
   ret half %trunc
 }
 
-define half @mixlo_fptrunc_abs_src_mod(float %a, float %b) #0 {
+define half @mixlo_fptrunc_abs_src_mod(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: mixlo_fptrunc_abs_src_mod:
 ; GFX1100:       ; %bb.0: ; %.entry
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2457,7 +2457,7 @@ define half @mixlo_fptrunc_abs_src_mod(float %a, float %b) #0 {
   ret half %trunc
 }
 
-define half @mixlo_fptrunc_neg_src_mod(float %a, float %b) #0 {
+define half @mixlo_fptrunc_neg_src_mod(float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: mixlo_fptrunc_neg_src_mod:
 ; GFX1100:       ; %bb.0: ; %.entry
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2504,32 +2504,29 @@ define half @mixlo_fptrunc_neg_src_mod(float %a, float %b) #0 {
   ret half %trunc
 }
 
-declare float @llvm.fabs.f32(float) #1
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable
 
-declare half @llvm.minnum.f16(half, half) #1
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
-declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) #1
-declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) #1
+declare half @llvm.minnum.f16(half, half) nounwind readnone speculatable
+declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) nounwind readnone speculatable
+declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) nounwind readnone speculatable
+declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) nounwind readnone speculatable
 
-declare half @llvm.maxnum.f16(half, half) #1
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
-declare <3 x half> @llvm.maxnum.v3f16(<3 x half>, <3 x half>) #1
-declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>) #1
+declare half @llvm.maxnum.f16(half, half) nounwind readnone speculatable
+declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) nounwind readnone speculatable
+declare <3 x half> @llvm.maxnum.v3f16(<3 x half>, <3 x half>) nounwind readnone speculatable
+declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>) nounwind readnone speculatable
 
-declare float @llvm.minnum.f32(float, float) #1
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
+declare float @llvm.minnum.f32(float, float) nounwind readnone speculatable
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) nounwind readnone speculatable
+declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) nounwind readnone speculatable
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) nounwind readnone speculatable
 
-declare float @llvm.maxnum.f32(float, float) #1
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
+declare float @llvm.maxnum.f32(float, float) nounwind readnone speculatable
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) nounwind readnone speculatable
+declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) nounwind readnone speculatable
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) nounwind readnone speculatable
 
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone speculatable }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone speculatable
+declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone speculatable
+declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) nounwind readnone speculatable
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index b520dd1060ec8c..b567d8552deaad 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -13,7 +13,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
 
-define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -71,7 +71,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 {
+define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -135,7 +135,7 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -196,7 +196,7 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %
   ret float %result
 }
 
-define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_v2f32:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -329,7 +329,7 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
   ret <2 x float> %result
 }
 
-define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_v2f32_shuffle:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -427,7 +427,7 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
   ret <2 x float> %result
 }
 
-define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -504,7 +504,7 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s
   ret float %result
 }
 
-define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -563,7 +563,7 @@ define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %s
   ret float %result
 }
 
-define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -623,7 +623,7 @@ define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -677,7 +677,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2)
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -732,7 +732,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %sr
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -787,7 +787,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %sr
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -847,7 +847,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float
 ; f16 inline immediate that may be converted to f32, not an actual f32
 ; inline immediate.
 
-define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -927,7 +927,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1014,7 +1014,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0
 ;	fpext f16 1/2pi = 0x3e230000
 ;	      f32 1/2pi = 0x3e22f983
 
-define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1115,7 +1115,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1)
 }
 
 
-define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1215,7 +1215,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
   ret float %result
 }
 
-define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
+define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1347,7 +1347,7 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
   ret <2 x float> %result
 }
 
-define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
+define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1486,7 +1486,7 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
   ret <2 x float> %result
 }
 
-define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
+define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
 ; SDAG-GFX1100:       ; %bb.0:
 ; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1621,7 +1621,7 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
   ret <2 x float> %result
 }
 
-define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
+define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1684,7 +1684,7 @@ define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x h
   ret float %clamp
 }
 
-define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
+define float @no_mix_simple(float %src0, float %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: no_mix_simple:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1724,7 +1724,7 @@ define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
   ret float %result
 }
 
-define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
+define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: no_mix_simple_fabs:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1769,7 +1769,7 @@ define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
 ; All sources are converted from f16, so it doesn't matter
 ; v_mad_mix_f32 flushes.
 
-define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 {
+define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1831,7 +1831,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %sr
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 {
+define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1888,7 +1888,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, fl
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 {
+define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1964,7 +1964,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0,
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 {
+define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2033,7 +2033,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2092,7 +2092,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, hal
   ret float %result
 }
 
-define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 {
+define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2147,7 +2147,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src
   ret float %result
 }
 
-define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2230,7 +2230,7 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1
 
 ; Make sure we don't fold pre-cvt fneg if we already have a fabs
 
-define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2302,7 +2302,7 @@ define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %
   ret float %result
 }
 
-define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2366,7 +2366,7 @@ define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1
   ret float %result
 }
 
-define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2451,7 +2451,7 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half
   ret float %result
 }
 
-define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2536,7 +2536,7 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half
   ret float %result
 }
 
-define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
+define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
 ; GFX1100:       ; %bb.0:
 ; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2622,14 +2622,10 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg,
   ret float %result
 }
 
-declare half @llvm.fabs.f16(half) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.minnum.f32(float, float) #2
-declare float @llvm.maxnum.f32(float, float) #2
-declare float @llvm.fmuladd.f32(float, float, float) #2
-declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
-attributes #2 = { nounwind readnone speculatable }
+declare half @llvm.fabs.f16(half) nounwind readnone speculatable
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nounwind readnone speculatable
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable
+declare float @llvm.minnum.f32(float, float) nounwind readnone speculatable
+declare float @llvm.maxnum.f32(float, float) nounwind readnone speculatable
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone speculatable
+declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/mad24-get-global-id.ll b/llvm/test/CodeGen/AMDGPU/mad24-get-global-id.ll
index 620566d3baff38..33e1035e20a80d 100644
--- a/llvm/test/CodeGen/AMDGPU/mad24-get-global-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad24-get-global-id.ll
@@ -3,15 +3,15 @@
 ; If the workgroup id range is restricted, we should be able to use
 ; mad24 for the usual indexing pattern.
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone
 
 ; GCN-LABEL: {{^}}get_global_id_0:
 ; GCN: s_and_b32 [[WGSIZEX:s[0-9]+]], {{s[0-9]+}}, 0xffff
 ; GCN: s_mul_i32 [[MUL:s[0-9]+]], s8, [[WGSIZEX]]
 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, [[MUL]], v0
-define amdgpu_kernel void @get_global_id_0(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @get_global_id_0(ptr addrspace(1) %out) nounwind {
   %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep = getelementptr inbounds i32, ptr addrspace(4) %dispatch.ptr, i64 1
   %workgroup.size.xy = load i32, ptr addrspace(4) %gep, align 4, !invariant.load !0
@@ -27,9 +27,6 @@ define amdgpu_kernel void @get_global_id_0(ptr addrspace(1) %out) #1 {
   ret void
 }
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-
 !0 = !{}
 !1 = !{i32 0, i32 1024}
 !2 = !{i32 0, i32 16777216}
diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
index 14bcc4f994f890..e11ab1f7b61278 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
@@ -8,7 +8,7 @@
 
 ; On GFX11, ensure vdst and src2 do not partially overlap. Full overlap is ok.
 
-define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
+define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_i64_i32_sextops:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -60,7 +60,7 @@ define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
   ret i64 %mad
 }
 
-define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
+define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_i64_i32_sextops_commute:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -112,7 +112,7 @@ define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
   ret i64 %mad
 }
 
-define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
+define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_u64_u32_zextops:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -164,7 +164,7 @@ define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
   ret i64 %mad
 }
 
-define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
+define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_u64_u32_zextops_commute:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -216,7 +216,7 @@ define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
   ret i64 %mad
 }
 
-define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
+define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) nounwind {
 ; CI-LABEL: mad_i64_i32_sextops_i32_i128:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -402,7 +402,7 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
   ret i128 %mad
 }
 
-define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 {
+define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) nounwind {
 ; CI-LABEL: mad_i64_i32_sextops_i32_i63:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -454,7 +454,7 @@ define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 {
   ret i63 %mad
 }
 
-define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 {
+define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) nounwind {
 ; CI-LABEL: mad_i64_i32_sextops_i31_i63:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -521,7 +521,7 @@ define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 {
   ret i63 %mad
 }
 
-define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
+define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_i64_i32_extops_i32_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -596,7 +596,7 @@ define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
   ret i64 %mad
 }
 
-define i64 @mad_u64_u32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
+define i64 @mad_u64_u32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_u64_u32_bitops:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -648,7 +648,7 @@ define i64 @mad_u64_u32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
   ret i64 %add
 }
 
-define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
+define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_u64_u32_bitops_lhs_mask_small:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -722,7 +722,7 @@ define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
   ret i64 %add
 }
 
-define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
+define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_u64_u32_bitops_rhs_mask_small:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -797,7 +797,7 @@ define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
   ret i64 %add
 }
 
-define i64 @mad_i64_i32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
+define i64 @mad_i64_i32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_i64_i32_bitops:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -852,7 +852,7 @@ define i64 @mad_i64_i32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
 }
 
 ; Example from bug report
-define i64 @mad_i64_i32_unpack_i64ops(i64 %arg0) #0 {
+define i64 @mad_i64_i32_unpack_i64ops(i64 %arg0) nounwind {
 ; CI-LABEL: mad_i64_i32_unpack_i64ops:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -904,7 +904,7 @@ define i64 @mad_i64_i32_unpack_i64ops(i64 %arg0) #0 {
   ret i64 %mad
 }
 
-define amdgpu_kernel void @mad_i64_i32_uniform(ptr addrspace(1) %out, i32 %arg0, i32 %arg1, i64 %arg2) #0 {
+define amdgpu_kernel void @mad_i64_i32_uniform(ptr addrspace(1) %out, i32 %arg0, i32 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_i64_i32_uniform:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -998,7 +998,7 @@ define amdgpu_kernel void @mad_i64_i32_uniform(ptr addrspace(1) %out, i32 %arg0,
   ret void
 }
 
-define i64 @mad_i64_i32_twice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3) #0 {
+define i64 @mad_i64_i32_twice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3) nounwind {
 ; CI-LABEL: mad_i64_i32_twice:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1072,7 +1072,7 @@ define i64 @mad_i64_i32_twice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3) #0 {
   ret i64 %out
 }
 
-define i64 @mad_i64_i32_thrice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3, i64 %arg4) #0 {
+define i64 @mad_i64_i32_thrice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3, i64 %arg4) nounwind {
 ; CI-LABEL: mad_i64_i32_thrice:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1189,7 +1189,7 @@ define i64 @mad_i64_i32_thrice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3, i64 %
   ret i64 %out
 }
 
-define i64 @mad_i64_i32_secondary_use(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
+define i64 @mad_i64_i32_secondary_use(i32 %arg0, i32 %arg1, i64 %arg2) nounwind {
 ; CI-LABEL: mad_i64_i32_secondary_use:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1267,7 +1267,7 @@ define i64 @mad_i64_i32_secondary_use(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
   ret i64 %out
 }
 
-define i48 @mad_i48_i48(i48 %arg0, i48 %arg1, i48 %arg2) #0 {
+define i48 @mad_i48_i48(i48 %arg0, i48 %arg1, i48 %arg2) nounwind {
 ; CI-LABEL: mad_i48_i48:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1335,6 +1335,3 @@ define i48 @mad_i48_i48(i48 %arg0, i48 %arg1, i48 %arg2) #0 {
   %a = add i48 %m, %arg2
   ret i48 %a
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
index a71b083d22e2bd..da00d4cfbb3d4e 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
@@ -258,7 +258,7 @@ entry:
 ; GCN: v_mad_u32_u24
 ; GCN: v_mad_u32_u24
 ; GCN: v_mad_u32_u24
-define void @mad24_known_bits_destroyed(i32 %arg, <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, i32 %arg4, i32 %arg5, i32 %arg6, ptr addrspace(1) %arg7, ptr addrspace(1) %arg8) #0 {
+define void @mad24_known_bits_destroyed(i32 %arg, <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, i32 %arg4, i32 %arg5, i32 %arg6, ptr addrspace(1) %arg7, ptr addrspace(1) %arg8) norecurse nounwind {
 bb:
   %tmp = and i32 %arg4, 16777215
   %tmp9 = extractelement <4 x i32> %arg1, i64 1
@@ -316,5 +316,3 @@ bb19:                                             ; preds = %bb19, %bb
   %tmp55 = icmp eq i32 %tmp54, %arg6
   br i1 %tmp55, label %bb18, label %bb19
 }
-
-attributes #0 = { norecurse nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index 9ec37a5e14cdf9..994aee61662f41 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -12,7 +12,7 @@
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.fabs.f32(float) nounwind readnone
 
-define amdgpu_kernel void @madak_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) #0 {
+define amdgpu_kernel void @madak_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: madak_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -162,7 +162,7 @@ define amdgpu_kernel void @madak_f32(ptr addrspace(1) noalias %out, ptr addrspac
 ; Make sure this is only folded with one use. This is a code size
 ; optimization and if we fold the immediate multiple times, we'll undo
 ; it.
-define amdgpu_kernel void @madak_2_use_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @madak_2_use_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: madak_2_use_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -362,7 +362,7 @@ define amdgpu_kernel void @madak_2_use_f32(ptr addrspace(1) noalias %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @madak_m_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a) #0 {
+define amdgpu_kernel void @madak_m_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: madak_m_inline_imm_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -481,7 +481,7 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(ptr addrspace(1) noalias %out,
 
 ; Make sure nothing weird happens with a value that is also allowed as
 ; an inline immediate.
-define amdgpu_kernel void @madak_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) #0 {
+define amdgpu_kernel void @madak_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: madak_inline_imm_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -629,7 +629,7 @@ define amdgpu_kernel void @madak_inline_imm_f32(ptr addrspace(1) noalias %out, p
 }
 
 ; We can't use an SGPR when forming madak
-define amdgpu_kernel void @s_v_madak_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, float %b) #0 {
+define amdgpu_kernel void @s_v_madak_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: s_v_madak_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -760,7 +760,7 @@ define amdgpu_kernel void @s_v_madak_f32(ptr addrspace(1) noalias %out, ptr addr
   ret void
 }
 
-define amdgpu_kernel void @v_s_madak_f32(ptr addrspace(1) noalias %out, float %a, ptr addrspace(1) noalias %in.b) #0 {
+define amdgpu_kernel void @v_s_madak_f32(ptr addrspace(1) noalias %out, float %a, ptr addrspace(1) noalias %in.b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_s_madak_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
@@ -902,7 +902,7 @@ define amdgpu_kernel void @v_s_madak_f32(ptr addrspace(1) noalias %out, float %a
   ret void
 }
 
-define amdgpu_kernel void @s_s_madak_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @s_s_madak_f32(ptr addrspace(1) %out, float %a, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: s_s_madak_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1000,7 +1000,7 @@ define amdgpu_kernel void @s_s_madak_f32(ptr addrspace(1) %out, float %a, float
   ret void
 }
 
-define amdgpu_kernel void @no_madak_src0_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) #0 {
+define amdgpu_kernel void @no_madak_src0_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: no_madak_src0_modifier_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1153,7 +1153,7 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(ptr addrspace(1) noalias %
   ret void
 }
 
-define amdgpu_kernel void @no_madak_src1_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) #0 {
+define amdgpu_kernel void @no_madak_src1_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: no_madak_src1_modifier_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1309,7 +1309,7 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(ptr addrspace(1) noalias %
 ; SIFoldOperands should not fold the SGPR copy into the instruction before GFX10
 ; because the implicit immediate already uses the constant bus.
 ; On GFX10+ we can use two scalar operands.
-define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, [8 x i32], float %sgpr0, float %sgpr1) #0 {
+define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, [8 x i32], float %sgpr0, float %sgpr1) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: madak_constant_bus_violation:
 ; GFX6:       ; %bb.0: ; %bb
 ; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x9
@@ -1512,5 +1512,3 @@ bb4:
   store volatile float %tmp2, ptr addrspace(1) undef, align 4
   ret void
 }
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll
index d5ef866dc8a85e..bf71f026a0b190 100644
--- a/llvm/test/CodeGen/AMDGPU/madmk.ll
+++ b/llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -12,7 +12,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_madmk_f32 {{v[0-9]+}}, [[VA]], 0x41200000, [[VB]]
-define amdgpu_kernel void @madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -34,7 +34,7 @@ define amdgpu_kernel void @madmk_f32(ptr addrspace(1) noalias %out, ptr addrspac
 ; GCN-DAG: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
 ; GCN-DAG: v_mac_f32_e32 [[VC]], 0x41200000, [[VA]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @madmk_2_use_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @madmk_2_use_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
   %in.gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
@@ -63,7 +63,7 @@ define amdgpu_kernel void @madmk_2_use_f32(ptr addrspace(1) noalias %out, ptr ad
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
-define amdgpu_kernel void @madmk_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @madmk_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -82,7 +82,7 @@ define amdgpu_kernel void @madmk_inline_imm_f32(ptr addrspace(1) noalias %out, p
 ; GCN-NOT: v_madmk_f32
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
-define amdgpu_kernel void @s_s_madmk_f32(ptr addrspace(1) noalias %out, [8 x i32], float %a, [8 x i32], float %b) #0 {
+define amdgpu_kernel void @s_s_madmk_f32(ptr addrspace(1) noalias %out, [8 x i32], float %a, [8 x i32], float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
@@ -98,7 +98,7 @@ define amdgpu_kernel void @s_s_madmk_f32(ptr addrspace(1) noalias %out, [8 x i32
 ; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]]
 ; GCN: v_madmk_f32 {{v[0-9]+}}, [[VREG1]], 0x41200000, [[VREG2]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @v_s_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %b) #0 {
+define amdgpu_kernel void @v_s_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %b) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -114,7 +114,7 @@ define amdgpu_kernel void @v_s_madmk_f32(ptr addrspace(1) noalias %out, ptr addr
 ; GCN-NOT: v_madmk_f32
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
-define amdgpu_kernel void @scalar_vector_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %a) #0 {
+define amdgpu_kernel void @scalar_vector_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -131,7 +131,7 @@ define amdgpu_kernel void @scalar_vector_madmk_f32(ptr addrspace(1) noalias %out
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
 ; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[SK]], [[VB]]
-define amdgpu_kernel void @no_madmk_src0_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @no_madmk_src0_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -152,7 +152,7 @@ define amdgpu_kernel void @no_madmk_src0_modifier_f32(ptr addrspace(1) noalias %
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{[sv][0-9]+}}, |{{v[0-9]+}}|
-define amdgpu_kernel void @no_madmk_src2_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @no_madmk_src2_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -174,7 +174,7 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(ptr addrspace(1) noalias %
 ; GCN: v_mov_b32_e32 [[B:v[0-9]+]], 2.0
 ; GCN: v_madmk_f32 {{v[0-9]+}}, [[A]], 0x41200000, [[B]]
 
-define amdgpu_kernel void @madmk_add_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @madmk_add_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -191,7 +191,7 @@ define amdgpu_kernel void @madmk_add_inline_imm_f32(ptr addrspace(1) noalias %ou
 ; SI: s_or_b64
 ; SI: s_xor_b64
 ; SI: v_mac_f32_e32 {{v[0-9]+}}, 0x472aee8c, {{v[0-9]+}}
-define amdgpu_kernel void @kill_madmk_verifier_error() #0 {
+define amdgpu_kernel void @kill_madmk_verifier_error() nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 bb:
   br label %bb2
 
@@ -200,7 +200,7 @@ bb1:                                              ; preds = %bb2
 
 bb2:                                              ; preds = %bb6, %bb
   %tmp = phi float [ undef, %bb ], [ %tmp8, %bb6 ]
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %f_tid = bitcast i32 %tid to float
   %tmp3 = fsub float %f_tid, %tmp
   %tmp5 = fcmp oeq float %tmp3, 1.000000e+04
@@ -212,7 +212,4 @@ bb6:                                              ; preds = %bb2
   br label %bb2
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/max-literals.ll b/llvm/test/CodeGen/AMDGPU/max-literals.ll
index f1cbf43aa089a1..5cf56a087c91fb 100644
--- a/llvm/test/CodeGen/AMDGPU/max-literals.ll
+++ b/llvm/test/CodeGen/AMDGPU/max-literals.ll
@@ -59,8 +59,6 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
+declare float @llvm.r600.dot4(<4 x float>, <4 x float>) readnone
 
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/max-sgprs.ll b/llvm/test/CodeGen/AMDGPU/max-sgprs.ll
index 964b1eda5f35c9..fe25311d96d065 100644
--- a/llvm/test/CodeGen/AMDGPU/max-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/max-sgprs.ll
@@ -2,7 +2,7 @@
 
 ; GCN-LABEL: {{^}}max_sgprs_gfx10:
 ; GCN: NumSgprs: 108
-define amdgpu_kernel void @max_sgprs_gfx10() #0 {
+define amdgpu_kernel void @max_sgprs_gfx10() nounwind "target-cpu"="gfx1010" {
   call void asm sideeffect "", "~{s[0:7]}" ()
   call void asm sideeffect "", "~{s[8:15]}" ()
   call void asm sideeffect "", "~{s[16:23]}" ()
@@ -21,5 +21,3 @@ define amdgpu_kernel void @max_sgprs_gfx10() #0 {
   call void asm sideeffect "", "~{vcc}" ()
   ret void
 }
-
-attributes #0 = { nounwind "target-cpu"="gfx1010" }
diff --git a/llvm/test/CodeGen/AMDGPU/max3.ll b/llvm/test/CodeGen/AMDGPU/max3.ll
index a757bb068cf8d6..2bfb1c7f1d0922 100644
--- a/llvm/test/CodeGen/AMDGPU/max3.ll
+++ b/llvm/test/CodeGen/AMDGPU/max3.ll
@@ -4,7 +4,7 @@
 
 ; GCN-LABEL: {{^}}v_test_imax3_sgt_i32:
 ; GCN: v_max3_i32
-define amdgpu_kernel void @v_test_imax3_sgt_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imax3_sgt_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
@@ -23,7 +23,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i32(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}v_test_umax3_ugt_i32:
 ; GCN: v_max3_u32
-define amdgpu_kernel void @v_test_umax3_ugt_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umax3_ugt_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
@@ -47,7 +47,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i32(ptr addrspace(1) %out, ptr addrs
 ; VI: v_max_i16
 
 ; GFX9: v_max3_i16
-define amdgpu_kernel void @v_test_imax3_sgt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imax3_sgt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i16, ptr addrspace(1) %bptr, i32 %tid
@@ -71,7 +71,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i16(ptr addrspace(1) %out, ptr addrs
 ; VI: v_max_u16
 
 ; GFX9: v_max3_u16
-define amdgpu_kernel void @v_test_umax3_ugt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umax3_ugt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i16, ptr addrspace(1) %bptr, i32 %tid
@@ -95,7 +95,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i16(ptr addrspace(1) %out, ptr addrs
 ; VI: v_max_i16
 
 ; GFX9: v_max3_i16
-define amdgpu_kernel void @v_test_imax3_sgt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imax3_sgt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i8, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i8, ptr addrspace(1) %bptr, i32 %tid
@@ -119,7 +119,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i8(ptr addrspace(1) %out, ptr addrsp
 ; VI: v_max_u16
 
 ; GFX9: v_max3_u16
-define amdgpu_kernel void @v_test_umax3_ugt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umax3_ugt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i8, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i8, ptr addrspace(1) %bptr, i32 %tid
@@ -143,7 +143,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i8(ptr addrspace(1) %out, ptr addrsp
 ; VI: v_max_i16
 
 ; GFX9: v_max3_i16
-define amdgpu_kernel void @v_test_imax3_sgt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imax3_sgt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i7, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i7, ptr addrspace(1) %bptr, i32 %tid
@@ -167,7 +167,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i7(ptr addrspace(1) %out, ptr addrsp
 ; VI: v_max_u16
 
 ; GFX9: v_max3_u16
-define amdgpu_kernel void @v_test_umax3_ugt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umax3_ugt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i7, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i7, ptr addrspace(1) %bptr, i32 %tid
@@ -186,7 +186,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i7(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}v_test_imax3_sgt_i33:
 ; GCN-NOT: v_max3
-define amdgpu_kernel void @v_test_imax3_sgt_i33(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imax3_sgt_i33(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i33, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i33, ptr addrspace(1) %bptr, i32 %tid
@@ -205,7 +205,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i33(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}v_test_umax3_ugt_i33:
 ; GCN-NOT: v_max3
-define amdgpu_kernel void @v_test_umax3_ugt_i33(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umax3_ugt_i33(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i33, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i33, ptr addrspace(1) %bptr, i32 %tid
@@ -224,7 +224,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i33(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}v_test_imax3_sgt_i64:
 ; GCN-NOT: v_max3
-define amdgpu_kernel void @v_test_imax3_sgt_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imax3_sgt_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i64, ptr addrspace(1) %bptr, i32 %tid
@@ -243,7 +243,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i64(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}v_test_umax3_ugt_i64:
 ; GCN-NOT: v_max3
-define amdgpu_kernel void @v_test_umax3_ugt_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umax3_ugt_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i64, ptr addrspace(1) %bptr, i32 %tid
@@ -260,7 +260,4 @@ define amdgpu_kernel void @v_test_umax3_ugt_i64(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
index 1d0533ca07b3e5..e24f814ed1a6e2 100644
--- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
@@ -4,7 +4,7 @@ target triple = "amdgcn-amd-amdhsa"
 
 @_RSENC_gDcd_______________________________ = external protected addrspace(1) externally_initialized global [4096 x i8], align 16
 
-define protected amdgpu_kernel void @_RSENC_PRInit__________________________________(i1 %c0) local_unnamed_addr #0 {
+define protected amdgpu_kernel void @_RSENC_PRInit__________________________________(i1 %c0) local_unnamed_addr "uniform-work-group-size"="true" {
 entry:
   %runtimeVersionCopy = alloca [128 x i8], align 16, addrspace(5)
   %licenseVersionCopy = alloca [128 x i8], align 16, addrspace(5)
@@ -150,5 +150,3 @@ if.end570:                                        ; preds = %for.body564, %if.el
 cleanup.cont:                                     ; preds = %if.end15, %if.end
   ret void
 }
-
-attributes #0 = { "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/mem-builtins.ll b/llvm/test/CodeGen/AMDGPU/mem-builtins.ll
index dd892ec3d59b39..95cb1993636249 100644
--- a/llvm/test/CodeGen/AMDGPU/mem-builtins.ll
+++ b/llvm/test/CodeGen/AMDGPU/mem-builtins.ll
@@ -1,9 +1,9 @@
 ; RUN: not llc -mtriple=r600 < %s 2>&1 | FileCheck -check-prefix=ERROR %s
 ; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
 
-declare hidden i32 @memcmp(ptr addrspace(1) readonly nocapture, ptr addrspace(1) readonly nocapture, i64) #0
+declare hidden i32 @memcmp(ptr addrspace(1) readonly nocapture, ptr addrspace(1) readonly nocapture, i64) nounwind
 declare hidden ptr addrspace(1) @memchr(ptr addrspace(1) readonly nocapture, i32, i64) #1
-declare hidden ptr @strcpy(ptr nocapture, ptr readonly nocapture) #0
+declare hidden ptr @strcpy(ptr nocapture, ptr readonly nocapture) nounwind
 declare hidden i32 @strlen(ptr nocapture) #1
 declare hidden i32 @strnlen(ptr nocapture, i32) #1
 declare hidden i32 @strcmp(ptr nocapture, ptr nocapture) #1
@@ -13,7 +13,7 @@ declare hidden i32 @strcmp(ptr nocapture, ptr nocapture) #1
 
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp at rel32@lo+4
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp at rel32@hi+12
-define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p) #0 {
+define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p) nounwind {
 entry:
   %cmp = tail call i32 @memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, i64 2)
   store volatile i32 %cmp, ptr addrspace(1) undef
@@ -24,7 +24,7 @@ entry:
 
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr at rel32@lo+4
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr at rel32@hi+12
-define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len) #0 {
+define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len) nounwind {
   %res = call ptr addrspace(1) @memchr(ptr addrspace(1) %src, i32 %char, i64 %len)
   store volatile ptr addrspace(1) %res, ptr addrspace(1) undef
   ret void
@@ -34,7 +34,7 @@ define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %le
 
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy at rel32@lo+4
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy at rel32@hi+12
-define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) #0 {
+define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) nounwind {
   %res = call ptr @strcpy(ptr %dst, ptr %src)
   store volatile ptr %res, ptr addrspace(1) undef
   ret void
@@ -44,7 +44,7 @@ define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) #0 {
 
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp at rel32@lo+4
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp at rel32@hi+12
-define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) #0 {
+define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) nounwind {
   %res = call i32 @strcmp(ptr %src0, ptr %src1)
   store volatile i32 %res, ptr addrspace(1) undef
   ret void
@@ -54,7 +54,7 @@ define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) #0 {
 
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen at rel32@lo+4
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen at rel32@hi+12
-define amdgpu_kernel void @test_strlen(ptr %src) #0 {
+define amdgpu_kernel void @test_strlen(ptr %src) nounwind {
   %res = call i32 @strlen(ptr %src)
   store volatile i32 %res, ptr addrspace(1) undef
   ret void
@@ -64,10 +64,8 @@ define amdgpu_kernel void @test_strlen(ptr %src) #0 {
 
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen at rel32@lo+4
 ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen at rel32@hi+12
-define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size) #0 {
+define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size) nounwind {
   %res = call i32 @strnlen(ptr %src, i32 %size)
   store volatile i32 %res, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
index 1dd18b4228fe5e..61a08be6488175 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
@@ -313,8 +313,5 @@ loop-memcpy-residual.preheader:                   ; preds = %loop-memcpy-residua
   br label %loop-memcpy-residual
 }
 
-declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
-declare i64 @llvm.umin.i64(i64, i64) #1
-
-attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare i64 @llvm.umin.i64(i64, i64) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll b/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
index a5e0ceaa6b329b..32bd26f42340c0 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
@@ -50,10 +50,8 @@ define void @memcpy_fixed_align(ptr addrspace(5)  %dst, ptr addrspace(1) %src) {
 ; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
   %alloca = alloca [40 x i8], addrspace(5)
   call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 dereferenceable(40) %alloca, ptr addrspace(1) align 4 dereferenceable(40) %src, i64 40, i1 false)
-  call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca) #0
+  call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca) argmemonly nounwind willreturn
   ret void
 }
 
-declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #0
-
-attributes #0 = { argmemonly nounwind willreturn }
+declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) argmemonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
index 2e6a73bb2cc00f..91d966cee98a89 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
@@ -1,13 +1,13 @@
 # RUN: llc -mtriple=amdgcn -run-pass=si-memory-legalizer  %s -o - | FileCheck %s
 
 --- |
-  declare i32 @llvm.amdgcn.workitem.id.x() #0
+  declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
   define amdgpu_kernel void @atomic_max_i32_noret(
       ptr addrspace(1) %out,
       ptr addrspace(1) %in,
       ptr addrspace(1) %x,
-      i32 %y) #1 {
+      i32 %y) nounwind "target-cpu"="gfx803" {
     %tid = call i32 @llvm.amdgcn.workitem.id.x()
     %idxprom = sext i32 %tid to i64
     %tid.gep = getelementptr ptr addrspace(1), ptr addrspace(1) %in, i64 %idxprom
@@ -33,9 +33,6 @@
 
   declare void @llvm.amdgcn.end.cf(i64)
 
-  attributes #0 = { nounwind readnone }
-  attributes #1 = { nounwind "target-cpu"="gfx803" }
-
 ...
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
index 18df16988d8e4d..d41d19da3da20e 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
@@ -1,7 +1,7 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer  %s -o - | FileCheck %s
 
 --- |
-  define amdgpu_kernel void @multiple_mem_operands(ptr addrspace(1) %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
+  define amdgpu_kernel void @multiple_mem_operands(ptr addrspace(1) %out, i32 %cond, i32 %if_offset, i32 %else_offset) "target-cpu"="gfx803" {
   entry:
     %scratch0 = alloca [8192 x i32], addrspace(5)
     %scratch1 = alloca [8192 x i32], addrspace(5)
@@ -29,29 +29,25 @@
   }
 
   ; Function Attrs: convergent nounwind
-  declare { i1, i64 } @llvm.amdgcn.if(i1) #1
+  declare { i1, i64 } @llvm.amdgcn.if(i1) convergent nounwind
 
   ; Function Attrs: convergent nounwind
-  declare { i1, i64 } @llvm.amdgcn.else(i64) #1
+  declare { i1, i64 } @llvm.amdgcn.else(i64) convergent nounwind
 
   ; Function Attrs: convergent nounwind readnone
-  declare i64 @llvm.amdgcn.break(i64) #2
+  declare i64 @llvm.amdgcn.break(i64) convergent nounwind readnone
 
   ; Function Attrs: convergent nounwind readnone
-  declare i64 @llvm.amdgcn.if.break(i1, i64) #2
+  declare i64 @llvm.amdgcn.if.break(i1, i64) convergent nounwind readnone
 
   ; Function Attrs: convergent nounwind readnone
-  declare i64 @llvm.amdgcn.else.break(i64, i64) #2
+  declare i64 @llvm.amdgcn.else.break(i64, i64) convergent nounwind readnone
 
   ; Function Attrs: convergent nounwind
-  declare i1 @llvm.amdgcn.loop(i64) #1
+  declare i1 @llvm.amdgcn.loop(i64) convergent nounwind
 
   ; Function Attrs: convergent nounwind
-  declare void @llvm.amdgcn.end.cf(i64) #1
-
-  attributes #0 = { "target-cpu"="gfx803" }
-  attributes #1 = { convergent nounwind }
-  attributes #2 = { convergent nounwind readnone }
+  declare void @llvm.amdgcn.end.cf(i64) convergent nounwind
 
   !0 = !{}
   !1 = !{i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
index 9cc688dd0c5325..cb1095717e1f66 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
@@ -2,7 +2,7 @@
 
 --- |
 
-  define amdgpu_kernel void @multiple_mem_operands(ptr addrspace(1) %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
+  define amdgpu_kernel void @multiple_mem_operands(ptr addrspace(1) %out, i32 %cond, i32 %if_offset, i32 %else_offset) "target-cpu"="gfx803" {
   entry:
     %scratch0 = alloca [8192 x i32], addrspace(5)
     %scratch1 = alloca [8192 x i32], addrspace(5)
@@ -29,10 +29,6 @@
     ret void
   }
 
-  attributes #0 = { "target-cpu"="gfx803" }
-  attributes #1 = { convergent nounwind }
-  attributes #2 = { convergent nounwind readnone }
-
   !0 = !{}
   !1 = !{i32 1}
 
diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
index 5415293f82ffb7..f89b47b643fad3 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
@@ -30,13 +30,13 @@
   @lds2 = external dso_local unnamed_addr addrspace(3) global [256 x i32], align 4
   @lds3 = external dso_local unnamed_addr addrspace(3) global [256 x i32], align 4
 
-  define void @asm_defines_address() #0 {
+  define void @asm_defines_address() convergent nounwind {
   bb:
     %tmp1 = load i32, i32 addrspace(3)* getelementptr inbounds ([256 x i32], [256 x i32] addrspace(3)* @lds0, i32 0, i32 0), align 4
     %0 = and i32 %tmp1, 255
     %tmp3 = load i32, i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds1, i32 0, i32 undef), align 4
     %tmp6 = load i32, i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds3, i32 0, i32 undef), align 4
-    %tmp7 = tail call i32 asm "v_or_b32 $0, 0, $1", "=v,v"(i32 %tmp6) #1
+    %tmp7 = tail call i32 asm "v_or_b32 $0, 0, $1", "=v,v"(i32 %tmp6) convergent nounwind readnone
     %tmp10 = lshr i32 %tmp7, 16
     %tmp11 = and i32 %tmp10, 255
     %tmp12 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(3)* @lds1, i32 0, i32 %tmp11
@@ -56,15 +56,10 @@
     ret void
   }
 
-  attributes #0 = { convergent nounwind }
-  attributes #1 = { convergent nounwind readnone }
-
-  define amdgpu_kernel void @move_waw_hazards() #0 {
+  define amdgpu_kernel void @move_waw_hazards() convergent nounwind {
     ret void
   }
 
-  attributes #0 = { convergent nounwind }
-
   define amdgpu_kernel void @merge_mmos(i32 addrspace(1)* %ptr_addr1) { ret void }
   define amdgpu_kernel void @reorder_offsets(i32 addrspace(1)* %reorder_addr1) { ret void }
 
diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
index bcd80a2a03b05a..ad451a472ccfc3 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
@@ -30,6 +30,4 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind willreturn writeonly }
+declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind willreturn writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
index 88cc5334d5c734..aec8f62692ae80 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
@@ -5,7 +5,7 @@
 ; CHECK: ds_write_b32
 ; CHECK: ds_read_b32
 ; CHECK: ds_write_b32
-define amdgpu_vs void @test1(i32 %v) #0 {
+define amdgpu_vs void @test1(i32 %v) nounwind {
   %p1 = getelementptr i32, ptr addrspace(3) null, i32 1
 
   store i32 %v, ptr addrspace(3) null
@@ -17,7 +17,4 @@ define amdgpu_vs void @test1(i32 %v) #0 {
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind willreturn writeonly }
+declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind willreturn writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
index 79f15123f2b265..be989278724413 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
@@ -12,7 +12,7 @@
 ; GCN-LABEL: {{^}}merge_global_store_2_constants_i8:
 ; GCN: buffer_store_short
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_global_store_2_constants_i8(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i8(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i32 1
 
   store i8 123, ptr addrspace(1) %out.gep.1
@@ -24,7 +24,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_i8(ptr addrspace(1) %o
 ; GCN: buffer_store_byte
 ; GCN: buffer_store_byte
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i32 1
 
   store i8 123, ptr addrspace(1) %out.gep.1
@@ -34,7 +34,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(ptr a
 
 ; GCN-LABEL: {{^}}merge_global_store_2_constants_i16:
 ; GCN: buffer_store_dword v
-define amdgpu_kernel void @merge_global_store_2_constants_i16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i16(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i16, ptr addrspace(1) %out, i32 1
 
   store i16 123, ptr addrspace(1) %out.gep.1
@@ -44,7 +44,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_i16(ptr addrspace(1) %
 
 ; GCN-LABEL: {{^}}merge_global_store_2_constants_0_i16:
 ; GCN: buffer_store_dword v
-define amdgpu_kernel void @merge_global_store_2_constants_0_i16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_0_i16(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i16, ptr addrspace(1) %out, i32 1
 
   store i16 0, ptr addrspace(1) %out.gep.1
@@ -56,7 +56,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_0_i16(ptr addrspace(1)
 ; GCN: buffer_store_short
 ; GCN: buffer_store_short
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i16, ptr addrspace(1) %out, i32 1
 
   store i16 123, ptr addrspace(1) %out.gep.1
@@ -68,7 +68,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(ptr
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
 ; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @merge_global_store_2_constants_i32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i32(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
 
   store i32 123, ptr addrspace(1) %out.gep.1
@@ -78,7 +78,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_i32(ptr addrspace(1) %
 
 ; GCN-LABEL: {{^}}merge_global_store_2_constants_i32_f32:
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   store float 1.0, ptr addrspace(1) %out.gep.1
   store i32 456, ptr addrspace(1) %out
@@ -89,7 +89,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(ptr addrspace(
 ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b
 ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   store i32 123, ptr addrspace(1) %out.gep.1
   store float 4.0, ptr addrspace(1) %out
@@ -102,7 +102,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(ptr addrspace(
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}}
 ; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @merge_global_store_4_constants_i32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_i32(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2
   %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3
@@ -116,7 +116,7 @@ define amdgpu_kernel void @merge_global_store_4_constants_i32(ptr addrspace(1) %
 
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
 ; GCN: buffer_store_dwordx4
-define amdgpu_kernel void @merge_global_store_4_constants_f32_order(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_f32_order(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2
   %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3
@@ -131,7 +131,7 @@ define amdgpu_kernel void @merge_global_store_4_constants_f32_order(ptr addrspac
 ; First store is out of order.
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
 ; GCN: buffer_store_dwordx4
-define amdgpu_kernel void @merge_global_store_4_constants_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_f32(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2
   %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3
@@ -146,7 +146,7 @@ define amdgpu_kernel void @merge_global_store_4_constants_f32(ptr addrspace(1) %
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
 ; GCN-AA: buffer_store_dwordx4 v
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2
   %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3
@@ -165,7 +165,7 @@ define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(ptr addr
 ; CI-DAG: buffer_store_dwordx3
 ; GCN-NOT: buffer_store_dword
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_global_store_3_constants_i32(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_3_constants_i32(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2
 
@@ -177,7 +177,7 @@ define amdgpu_kernel void @merge_global_store_3_constants_i32(ptr addrspace(1) %
 
 ; GCN-LABEL: {{^}}merge_global_store_2_constants_i64:
 ; GCN: buffer_store_dwordx4
-define amdgpu_kernel void @merge_global_store_2_constants_i64(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_2_constants_i64(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i64, ptr addrspace(1) %out, i64 1
 
   store i64 123, ptr addrspace(1) %out.gep.1
@@ -188,7 +188,7 @@ define amdgpu_kernel void @merge_global_store_2_constants_i64(ptr addrspace(1) %
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_i64:
 ; GCN: buffer_store_dwordx4
 ; GCN: buffer_store_dwordx4
-define amdgpu_kernel void @merge_global_store_4_constants_i64(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @merge_global_store_4_constants_i64(ptr addrspace(1) %out) nounwind {
   %out.gep.1 = getelementptr i64, ptr addrspace(1) %out, i64 1
   %out.gep.2 = getelementptr i64, ptr addrspace(1) %out, i64 2
   %out.gep.3 = getelementptr i64, ptr addrspace(1) %out, i64 3
@@ -203,7 +203,7 @@ define amdgpu_kernel void @merge_global_store_4_constants_i64(ptr addrspace(1) %
 ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32:
 ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
 ; GCN: buffer_store_dwordx2 [[LOAD]]
-define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1
 
@@ -218,7 +218,7 @@ define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(ptr addrspace
 ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base:
 ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
 ; GCN: buffer_store_dwordx2 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %in.gep.0 = getelementptr i32, ptr addrspace(1) %in, i32 2
   %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 3
 
@@ -235,7 +235,7 @@ define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(
 ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_shuffle_i32:
 ; GCN: buffer_load_dwordx2 v
 ; GCN: buffer_store_dwordx2 v
-define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1
 
@@ -250,7 +250,7 @@ define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(ptr a
 ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32:
 ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
 ; GCN: buffer_store_dwordx4 [[LOAD]]
-define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2
   %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3
@@ -279,7 +279,7 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(ptr addrspace
 ; SI-DAG: buffer_store_dword v
 ; CI-DAG: buffer_store_dwordx3
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2
   %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 1
@@ -298,7 +298,7 @@ define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(ptr addrspace
 ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_f32:
 ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
 ; GCN: buffer_store_dwordx4 [[LOAD]]
-define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr float, ptr addrspace(1) %out, i32 2
   %out.gep.3 = getelementptr float, ptr addrspace(1) %out, i32 3
@@ -321,7 +321,7 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(ptr addrspace
 ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base:
 ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
 ; GCN: buffer_store_dwordx4 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
-define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %in.gep.0 = getelementptr i32, ptr addrspace(1) %in, i32 11
   %in.gep.1 = getelementptr i32, ptr addrspace(1) %in, i32 12
   %in.gep.2 = getelementptr i32, ptr addrspace(1) %in, i32 13
@@ -347,7 +347,7 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(
 ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
 ; GCN: s_barrier
 ; GCN: buffer_store_dwordx4 [[LOAD]]
-define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2
   %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3
@@ -361,7 +361,7 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(ptr a
   %w = load i32, ptr addrspace(1) %in.gep.3
 
   ; Make sure the barrier doesn't stop this
-  tail call void @llvm.amdgcn.s.barrier() #1
+  tail call void @llvm.amdgcn.s.barrier() convergent nounwind
 
   store i32 %w, ptr addrspace(1) %out.gep.3
   store i32 %z, ptr addrspace(1) %out.gep.2
@@ -378,7 +378,7 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(ptr a
 ; GCN: buffer_load_dwordx4 v
 ; GCN: s_barrier
 ; GCN: buffer_store_dwordx4 v
-define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2
   %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3
@@ -392,7 +392,7 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(ptr a
   %w = load i32, ptr addrspace(1) %in.gep.3
 
   ; Make sure the barrier doesn't stop this
-  tail call void @llvm.amdgcn.s.barrier() #1
+  tail call void @llvm.amdgcn.s.barrier() convergent nounwind
 
   store i32 %w, ptr addrspace(1) %out
   store i32 %z, ptr addrspace(1) %out.gep.1
@@ -406,7 +406,7 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(ptr a
 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
 ; GCN: buffer_store_dword [[LOAD]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i8 1
   %out.gep.2 = getelementptr i8, ptr addrspace(1) %out, i8 2
   %out.gep.3 = getelementptr i8, ptr addrspace(1) %out, i8 3
@@ -436,7 +436,7 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(ptr addrspace(
 ; GCN: buffer_store_byte
 ; GCN: buffer_store_byte
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr i8, ptr addrspace(1) %out, i8 1
   %out.gep.2 = getelementptr i8, ptr addrspace(1) %out, i8 2
   %out.gep.3 = getelementptr i8, ptr addrspace(1) %out, i8 3
@@ -460,7 +460,7 @@ define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(
 ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
 ; GCN: buffer_store_dwordx4 [[LOAD]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
   %out.gep.2 = getelementptr i32, ptr addrspace(1) %out, i32 2
   %out.gep.3 = getelementptr i32, ptr addrspace(1) %out, i32 3
@@ -481,7 +481,7 @@ define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(ptr addr
 ; GCN-LABEL: {{^}}merge_local_store_2_constants_i8:
 ; GCN: ds_write_b16
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_local_store_2_constants_i8(ptr addrspace(3) %out) #0 {
+define amdgpu_kernel void @merge_local_store_2_constants_i8(ptr addrspace(3) %out) nounwind {
   %out.gep.1 = getelementptr i8, ptr addrspace(3) %out, i32 1
 
   store i8 123, ptr addrspace(3) %out.gep.1
@@ -493,7 +493,7 @@ define amdgpu_kernel void @merge_local_store_2_constants_i8(ptr addrspace(3) %ou
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
-define amdgpu_kernel void @merge_local_store_2_constants_i32(ptr addrspace(3) %out) #0 {
+define amdgpu_kernel void @merge_local_store_2_constants_i32(ptr addrspace(3) %out) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(3) %out, i32 1
 
   store i32 123, ptr addrspace(3) %out.gep.1
@@ -511,7 +511,7 @@ define amdgpu_kernel void @merge_local_store_2_constants_i32(ptr addrspace(3) %o
 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K0]], [[K1]] offset1:1
 
 ; GCN: s_endpgm
-define amdgpu_kernel void @merge_local_store_4_constants_i32(ptr addrspace(3) %out) #0 {
+define amdgpu_kernel void @merge_local_store_4_constants_i32(ptr addrspace(3) %out) nounwind {
   %out.gep.1 = getelementptr i32, ptr addrspace(3) %out, i32 1
   %out.gep.2 = getelementptr i32, ptr addrspace(3) %out, i32 2
   %out.gep.3 = getelementptr i32, ptr addrspace(3) %out, i32 3
@@ -621,7 +621,7 @@ define amdgpu_kernel void @merge_global_store_8_constants_i32(ptr addrspace(1) %
 ; CI-DAG: buffer_store_dwordx3 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 
 ; GCN: ScratchSize: 0{{$}}
-define amdgpu_kernel void @copy_v3i32_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @copy_v3i32_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
   %vec = load <3 x i32>, ptr addrspace(1) %in, align 4
   store <3 x i32> %vec, ptr addrspace(1) %out
   ret void
@@ -637,7 +637,7 @@ define amdgpu_kernel void @copy_v3i32_align4(ptr addrspace(1) noalias %out, ptr
 ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN: ScratchSize: 0{{$}}
-define amdgpu_kernel void @copy_v3i64_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @copy_v3i64_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
   %vec = load <3 x i64>, ptr addrspace(1) %in, align 4
   store <3 x i64> %vec, ptr addrspace(1) %out
   ret void
@@ -655,7 +655,7 @@ define amdgpu_kernel void @copy_v3i64_align4(ptr addrspace(1) noalias %out, ptr
 ; SI-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
 ; CI-DAG: buffer_store_dwordx3 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 ; GCN: ScratchSize: 0{{$}}
-define amdgpu_kernel void @copy_v3f32_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @copy_v3f32_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
   %vec = load <3 x float>, ptr addrspace(1) %in, align 4
   %fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0>
   store <3 x float> %fadd, ptr addrspace(1) %out
@@ -672,14 +672,11 @@ define amdgpu_kernel void @copy_v3f32_align4(ptr addrspace(1) noalias %out, ptr
 ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN: ScratchSize: 0{{$}}
-define amdgpu_kernel void @copy_v3f64_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+define amdgpu_kernel void @copy_v3f64_align4(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
   %vec = load <3 x double>, ptr addrspace(1) %in, align 4
   %fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0>
   store <3 x double> %fadd, ptr addrspace(1) %out
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index ba34c1bbe1d710..b93576500a54e4 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -9,7 +9,7 @@ declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_vgpr:
 ; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
 ; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vgpr(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vgpr(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -19,7 +19,7 @@ bb:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_agpr:
 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_agpr(ptr addrspace(1) %arg) #1 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_agpr(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -29,7 +29,7 @@ bb:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_inline_asm_virtual_agpr:
 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_virtual_agpr(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_virtual_agpr(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" {
 bb:
   %acc = call i32 asm sideeffect "; def $0", "={a0}"()
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
@@ -40,7 +40,7 @@ bb:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_inline_asm_phys_agpr:
 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_phys_agpr(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_phys_agpr(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" {
 bb:
   call void asm sideeffect "; use $0", "{a[100:131]}"(<32 x float> undef)
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
@@ -52,7 +52,7 @@ bb:
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_inline_asm_no_agprs:
 ; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
 ; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_no_agprs(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_no_agprs(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" {
 bb:
   %acc = call i32 asm sideeffect "; def $0", "={v0}"()
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
@@ -63,7 +63,7 @@ bb:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_call:
 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" {
 bb:
   call void @foo()
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
@@ -78,7 +78,7 @@ bb:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_call_multi_bb:
 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(1) %arg, i1 %c0) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(1) %arg, i1 %c0) "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" {
 bb1:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3)
@@ -96,7 +96,7 @@ bb3:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry:
 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
-define void @test_mfma_f32_32x32x1f32_nonentry(ptr addrspace(1) %arg) #0 {
+define void @test_mfma_f32_32x32x1f32_nonentry(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -105,6 +105,3 @@ bb:
 }
 
 declare void @foo()
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
-attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index d5aeff7e819dd5..08a98a085ab41f 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -22,7 +22,7 @@
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_loop_zeroinit(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_loop_zeroinit(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   br label %for.cond.preheader
 
@@ -60,7 +60,7 @@ exit:
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_loop_unfoldable_splat(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   br label %for.cond.preheader
 
@@ -94,7 +94,7 @@ exit:
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_loop_non_splat(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   br label %for.cond.preheader
 
@@ -228,7 +228,7 @@ exit:
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_loop_unfoldable_seq(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   br label %for.cond.preheader
 
@@ -261,7 +261,7 @@ exit:
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_loop_vgpr_init(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_loop_vgpr_init(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %init = bitcast i32 %tid to float
@@ -331,7 +331,7 @@ exit:
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float %init) #0 {
+define amdgpu_kernel void @test_mfma_loop_sgpr_init(ptr addrspace(1) %arg, float %init) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tmp0 = insertelement <32 x float> undef, float %init, i32 0
   %tmp1 = insertelement <32 x float> %tmp0, float %init, i32 1
@@ -433,7 +433,7 @@ exit:
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_loop_mixed_init(ptr addrspace(1) %arg, float %x) #0 {
+define amdgpu_kernel void @test_mfma_loop_mixed_init(ptr addrspace(1) %arg, float %x) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %init = bitcast i32 %tid to float
@@ -477,7 +477,7 @@ exit:
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_loop_mfma_forward_init(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_loop_mfma_forward_init(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> zeroinitializer, i32 0, i32 0, i32 0)
 
@@ -522,7 +522,7 @@ exit:
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_loop_agpr_init(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_loop_agpr_init(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> zeroinitializer, i32 0, i32 0, i32 0)
   %init = extractelement <32 x float> %mai.0, i32 0
@@ -600,7 +600,7 @@ exit:
 ; GFX908-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}]
 ; GFX90A-COUNT-8:  global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
 
-define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   br label %for.cond.preheader
 
@@ -629,5 +629,3 @@ exit:
 
 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
 declare i32 @llvm.amdgcn.workitem.id.x()
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
index 8dbbab3c57f72f..5422cd0af14369 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
@@ -18,7 +18,7 @@ declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i3
 ; FAST:   v_mfma_f32_32x32x1{{.*}} a[64:95], v{{[0-9]+}}, v{{[0-9]+}}, a[64:95]
 ; FAST:   v_mfma_f32_32x32x1{{.*}} a[32:63], v{{[0-9]+}}, v{{[0-9]+}}, a[64:95]
 ; GCN:    v_mfma_f32_32x32x1{{.*}} a[0:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -42,7 +42,7 @@ bb:
 ; FAST:   v_mfma_f32_16x16x1{{.*}} a[32:47], v{{[0-9]+}}, v{{[0-9]+}}, a[32:47]
 ; FAST:   v_mfma_f32_16x16x1{{.*}} a[16:31], v{{[0-9]+}}, v{{[0-9]+}}, a[32:47]
 ; GCN:    v_mfma_f32_16x16x1{{.*}} a[0:15], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
-define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> %in.1, i32 0, i32 0, i32 0)
@@ -61,7 +61,7 @@ bb:
 ; FAST:   v_mfma_f32_4x4x1{{.*}} a[8:11], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
 ; FAST:   v_mfma_f32_4x4x1{{.*}} a[4:7], v{{[0-9]+}}, v{{[0-9]+}}, a[8:11]
 ; GCN:    v_mfma_f32_4x4x1{{.*}} a[0:3], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
-define amdgpu_kernel void @test_mfma_f32_4x4x1f32(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x1f32(ptr addrspace(1) %arg) "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> %in.1, i32 0, i32 0, i32 0)
@@ -71,5 +71,3 @@ bb:
   store <4 x float> %mai.3, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/min-waves-per-eu-not-respected.ll b/llvm/test/CodeGen/AMDGPU/min-waves-per-eu-not-respected.ll
index 239fa80ade98a9..d3bc9f7fbbcf1f 100644
--- a/llvm/test/CodeGen/AMDGPU/min-waves-per-eu-not-respected.ll
+++ b/llvm/test/CodeGen/AMDGPU/min-waves-per-eu-not-respected.ll
@@ -2,15 +2,12 @@
 
 ; 1024 flat work group size across 2560 possible threads -> occupancy should be 8 max.
 ; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'occupancy_8_target_9': desired occupancy was 9, final occupancy is 8
-define amdgpu_kernel void @occupancy_8_target_9() #0 {
+define amdgpu_kernel void @occupancy_8_target_9() "amdgpu-flat-work-group-size"="1,1024" "amdgpu-waves-per-eu"="9" {
   ret void
 }
 
 ; Impossible occupancy target
 ; WARN: warning: <unknown>:0:0: failed to meet occupancy target given by 'amdgpu-waves-per-eu' in 'impossible_occupancy': desired occupancy was 11, final occupancy is 10
-define amdgpu_kernel void @impossible_occupancy() #1 {
+define amdgpu_kernel void @impossible_occupancy() "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="11" {
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" "amdgpu-waves-per-eu"="9" }
-attributes #1 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="11" }
diff --git a/llvm/test/CodeGen/AMDGPU/min3.ll b/llvm/test/CodeGen/AMDGPU/min3.ll
index 0e25540f5dd2ea..d4c6b0ce9f3fba 100644
--- a/llvm/test/CodeGen/AMDGPU/min3.ll
+++ b/llvm/test/CodeGen/AMDGPU/min3.ll
@@ -4,7 +4,7 @@
 
 ; GCN-LABEL: {{^}}v_test_imin3_slt_i32:
 ; GCN: v_min3_i32
-define amdgpu_kernel void @v_test_imin3_slt_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imin3_slt_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
@@ -23,7 +23,7 @@ define amdgpu_kernel void @v_test_imin3_slt_i32(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}v_test_umin3_ult_i32:
 ; GCN: v_min3_u32
-define amdgpu_kernel void @v_test_umin3_ult_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umin3_ult_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
@@ -43,7 +43,7 @@ define amdgpu_kernel void @v_test_umin3_ult_i32(ptr addrspace(1) %out, ptr addrs
 ; GCN-LABEL: {{^}}v_test_umin_umin_umin:
 ; GCN: v_min_i32
 ; GCN: v_min3_i32
-define amdgpu_kernel void @v_test_umin_umin_umin(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umin_umin_umin(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid2 = mul i32 %tid, 2
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
@@ -77,7 +77,7 @@ define amdgpu_kernel void @v_test_umin_umin_umin(ptr addrspace(1) %out, ptr addr
 
 ; GCN-LABEL: {{^}}v_test_umin3_2_uses:
 ; GCN-NOT: v_min3
-define amdgpu_kernel void @v_test_umin3_2_uses(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umin3_2_uses(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid2 = mul i32 %tid, 2
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
@@ -117,7 +117,7 @@ define amdgpu_kernel void @v_test_umin3_2_uses(ptr addrspace(1) %out, ptr addrsp
 ; VI: v_min_i16
 
 ; GFX9: v_min3_i16
-define amdgpu_kernel void @v_test_imin3_slt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imin3_slt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i16, ptr addrspace(1) %bptr, i32 %tid
@@ -141,7 +141,7 @@ define amdgpu_kernel void @v_test_imin3_slt_i16(ptr addrspace(1) %out, ptr addrs
 ; VI: v_min_u16
 
 ; GFX9: v_min3_u16
-define amdgpu_kernel void @v_test_umin3_ult_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umin3_ult_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i16, ptr addrspace(1) %bptr, i32 %tid
@@ -165,7 +165,7 @@ define amdgpu_kernel void @v_test_umin3_ult_i16(ptr addrspace(1) %out, ptr addrs
 ; VI: v_min_i16
 
 ; GFX9: v_min3_i16
-define amdgpu_kernel void @v_test_imin3_slt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imin3_slt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i8, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i8, ptr addrspace(1) %bptr, i32 %tid
@@ -189,7 +189,7 @@ define amdgpu_kernel void @v_test_imin3_slt_i8(ptr addrspace(1) %out, ptr addrsp
 ; VI: v_min_u16
 
 ; GFX9: v_min3_u16
-define amdgpu_kernel void @v_test_umin3_ult_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umin3_ult_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i8, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i8, ptr addrspace(1) %bptr, i32 %tid
@@ -213,7 +213,7 @@ define amdgpu_kernel void @v_test_umin3_ult_i8(ptr addrspace(1) %out, ptr addrsp
 ; VI: v_min_i16
 
 ; GFX9: v_min3_i16
-define amdgpu_kernel void @v_test_imin3_slt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imin3_slt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i7, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i7, ptr addrspace(1) %bptr, i32 %tid
@@ -237,7 +237,7 @@ define amdgpu_kernel void @v_test_imin3_slt_i7(ptr addrspace(1) %out, ptr addrsp
 ; VI: v_min_u16
 
 ; GFX9: v_min3_u16
-define amdgpu_kernel void @v_test_umin3_ult_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umin3_ult_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i7, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i7, ptr addrspace(1) %bptr, i32 %tid
@@ -256,7 +256,7 @@ define amdgpu_kernel void @v_test_umin3_ult_i7(ptr addrspace(1) %out, ptr addrsp
 
 ; GCN-LABEL: {{^}}v_test_imin3_slt_i33:
 ; GCN-NOT: v_min3
-define amdgpu_kernel void @v_test_imin3_slt_i33(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imin3_slt_i33(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i33, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i33, ptr addrspace(1) %bptr, i32 %tid
@@ -275,7 +275,7 @@ define amdgpu_kernel void @v_test_imin3_slt_i33(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}v_test_umin3_ult_i33:
 ; GCN-NOT: v_min3
-define amdgpu_kernel void @v_test_umin3_ult_i33(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umin3_ult_i33(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i33, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i33, ptr addrspace(1) %bptr, i32 %tid
@@ -294,7 +294,7 @@ define amdgpu_kernel void @v_test_umin3_ult_i33(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}v_test_imin3_slt_i64:
 ; GCN-NOT: v_min3
-define amdgpu_kernel void @v_test_imin3_slt_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_imin3_slt_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i64, ptr addrspace(1) %bptr, i32 %tid
@@ -313,7 +313,7 @@ define amdgpu_kernel void @v_test_imin3_slt_i64(ptr addrspace(1) %out, ptr addrs
 
 ; GCN-LABEL: {{^}}v_test_umin3_ult_i64:
 ; GCN-NOT: v_min3
-define amdgpu_kernel void @v_test_umin3_ult_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+define amdgpu_kernel void @v_test_umin3_ult_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %gep1 = getelementptr i64, ptr addrspace(1) %bptr, i32 %tid
@@ -330,7 +330,4 @@ define amdgpu_kernel void @v_test_umin3_ult_i64(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index b5b9997f297c20..bafc3bf0112d2e 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -270,7 +270,7 @@ define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b,
   ret float %maxmin
 }
 
-define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) #0 {
+define void @test_med3_f32(ptr addrspace(1) %arg, float %x, float %y, float %z) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; GFX11-LABEL: test_med3_f32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -379,7 +379,7 @@ define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) {
   ret half %maxmin
 }
 
-define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0 {
+define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" {
 ; GFX11-LABEL: test_med3_f16:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -402,5 +402,4 @@ declare half @llvm.minnum.f16(half, half)
 declare half @llvm.maxnum.f16(half, half)
 declare float @llvm.minnum.f32(float, float)
 declare float @llvm.maxnum.f32(float, float)
-attributes #0 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
 
diff --git a/llvm/test/CodeGen/AMDGPU/missing-store.ll b/llvm/test/CodeGen/AMDGPU/missing-store.ll
index b97b852363393f..e11a7fbfb48f09 100644
--- a/llvm/test/CodeGen/AMDGPU/missing-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/missing-store.ll
@@ -14,7 +14,7 @@
 ; SI-DAG: buffer_store_dword
 ; SI-DAG: buffer_store_dword
 ; SI:     s_endpgm
-define amdgpu_kernel void @missing_store_reduced(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 {
+define amdgpu_kernel void @missing_store_reduced(ptr addrspace(1) %out, ptr addrspace(1) %gptr) nounwind {
   %ptr0 = load ptr addrspace(4), ptr addrspace(3) @ptr_load, align 8
   %ptr2 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 2
 
@@ -25,5 +25,3 @@ define amdgpu_kernel void @missing_store_reduced(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-attributes #0 = { nounwind }
-
diff --git a/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll b/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll
index fe4c2e4b488b89..53bc78a29f91f8 100644
--- a/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll
+++ b/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll
@@ -2,7 +2,7 @@
 
 ; GCN-LABEL: _amdgpu_hs_main:
 
-define amdgpu_hs void @_amdgpu_hs_main() #0 {
+define amdgpu_hs void @_amdgpu_hs_main() "amdgpu-max-work-group-size"="128" "target-features"=",+wavefrontsize32" {
 .entry:
   ret void
 }
@@ -10,9 +10,9 @@ define amdgpu_hs void @_amdgpu_hs_main() #0 {
 ; GCN-LABEL: _amdgpu_ps_main:
 ; GCN: s_and_saveexec_b64
 
-define amdgpu_ps void @_amdgpu_ps_main(i32 %arg) local_unnamed_addr #1 {
+define amdgpu_ps void @_amdgpu_ps_main(i32 %arg) local_unnamed_addr "target-features"=",+wavefrontsize64" {
 .entry:
-  %tmp = tail call float @llvm.amdgcn.interp.p2(float undef, float undef, i32 1, i32 0, i32 %arg) #2
+  %tmp = tail call float @llvm.amdgcn.interp.p2(float undef, float undef, i32 1, i32 0, i32 %arg) nounwind readnone speculatable
   %tmp1 = tail call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float %tmp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
   %tmp2 = fcmp olt float %tmp1, 5.000000e-01
   br i1 %tmp2, label %bb, label %l
@@ -26,16 +26,10 @@ l: ; preds = %.entry
 
 ; GCN-LABEL: _amdgpu_gs_main:
 
-define amdgpu_gs void @_amdgpu_gs_main() #4 {
+define amdgpu_gs void @_amdgpu_gs_main() "target-features"=",+wavefrontsize32" {
 .entry:
   ret void
 }
 
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
-declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-
-attributes #0 = { "amdgpu-max-work-group-size"="128" "target-features"=",+wavefrontsize32" }
-attributes #1 = { "target-features"=",+wavefrontsize64" }
-attributes #2 = { nounwind readnone speculatable }
-attributes #3 = { nounwind readonly }
-attributes #4 = { "target-features"=",+wavefrontsize32" }
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind readnone speculatable
+declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/mixed_wave32_wave64.ll b/llvm/test/CodeGen/AMDGPU/mixed_wave32_wave64.ll
index 5977566e2d00ba..720ed9d5c72f49 100644
--- a/llvm/test/CodeGen/AMDGPU/mixed_wave32_wave64.ll
+++ b/llvm/test/CodeGen/AMDGPU/mixed_wave32_wave64.ll
@@ -30,7 +30,7 @@
 ;
 ; GCN: .amd_amdgpu_pal_metadata{{.*}},0x2e00,0x8000,{{.*}}0xa1b6,0x1,{{.*}},0xa2d5,0xe00000,
 
-define dllexport amdgpu_ps void @_amdgpu_ps_main(float %arg10) #0 {
+define dllexport amdgpu_ps void @_amdgpu_ps_main(float %arg10) nounwind "InitialPSInputAddr"="2" "target-features"="+wavefrontsize64" {
 .entry:
   %tmp100 = fcmp ogt float %arg10, 0.25
   br i1 %tmp100, label %if, label %endif
@@ -43,7 +43,7 @@ endif:
   ret void
 }
 
-define dllexport amdgpu_vs void @_amdgpu_vs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, float %arg10) local_unnamed_addr #2 {
+define dllexport amdgpu_vs void @_amdgpu_vs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, float %arg10) local_unnamed_addr nounwind "target-features"="+wavefrontsize32" {
 .entry:
   %tmp100 = fcmp ogt float %arg10, 0.25
   br i1 %tmp100, label %if, label %endif
@@ -56,7 +56,7 @@ endif:
   ret void
 }
 
-define dllexport amdgpu_gs void @_amdgpu_gs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, float %arg10) local_unnamed_addr #2 {
+define dllexport amdgpu_gs void @_amdgpu_gs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, float %arg10) local_unnamed_addr nounwind "target-features"="+wavefrontsize32" {
 .entry:
   %tmp100 = fcmp ogt float %arg10, 0.25
   br i1 %tmp100, label %if, label %endif
@@ -69,7 +69,7 @@ endif:
   ret void
 }
 
-define dllexport amdgpu_hs void @_amdgpu_hs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, float %arg10) local_unnamed_addr #2 {
+define dllexport amdgpu_hs void @_amdgpu_hs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, float %arg10) local_unnamed_addr nounwind "target-features"="+wavefrontsize32" {
 .entry:
   %tmp100 = fcmp ogt float %arg10, 0.25
   br i1 %tmp100, label %if, label %endif
@@ -82,7 +82,7 @@ endif:
   ret void
 }
 
-define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, float %arg10) local_unnamed_addr #2 {
+define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, float %arg10) local_unnamed_addr nounwind "target-features"="+wavefrontsize32" {
 .entry:
   %tmp100 = fcmp ogt float %arg10, 0.25
   br i1 %tmp100, label %if, label %endif
@@ -95,12 +95,7 @@ endif:
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2
-
-attributes #0 = { nounwind "InitialPSInputAddr"="2" "target-features"="+wavefrontsize64" }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind "target-features"="+wavefrontsize32" }
-attributes #3 = { nounwind readonly }
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind "target-features"="+wavefrontsize32"
 
 !amdgpu.pal.metadata = !{!8}
 
diff --git a/llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll b/llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
index b38388d748f215..578a929c64e6cd 100644
--- a/llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
@@ -18,7 +18,7 @@
 ; GCN: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]]
 ; GCN: buffer_load_ubyte v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]],
 
-define amdgpu_kernel void @clobber_vgpr_pair_pointer_add(i64 %arg1, [8 x i32], ptr addrspace(1) %ptrarg, i32 %arg3) #0 {
+define amdgpu_kernel void @clobber_vgpr_pair_pointer_add(i64 %arg1, [8 x i32], ptr addrspace(1) %ptrarg, i32 %arg3) nounwind {
 bb:
   %tmp = icmp sgt i32 %arg3, 0
   br i1 %tmp, label %bb4, label %bb17
@@ -32,5 +32,3 @@ bb4:
 bb17:
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll
index 4332d9daeaaf5e..58c99e1f5a0c21 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll
@@ -8,9 +8,9 @@
 ; Check that moving the pointer out of the resource descriptor to
 ; vaddr works for atomics.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 {
+define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) nounwind {
 ; GCN-LABEL: atomic_max_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -70,7 +70,7 @@ exit:
   ret void
 }
 
-define amdgpu_kernel void @atomic_max_i32_noret(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 {
+define amdgpu_kernel void @atomic_max_i32_noret(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) nounwind {
 ; GCN-LABEL: atomic_max_i32_noret:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
@@ -122,6 +122,3 @@ atomic:
 exit:
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
index 63688ebeab9d0b..7d473bbab48e52 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
@@ -8,9 +8,9 @@
 ; Check that moving the pointer out of the resource descriptor to
 ; vaddr works for atomics.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 {
+define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) nounwind {
 ; GCN-LABEL: atomic_max_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -55,7 +55,7 @@ exit:
   ret void
 }
 
-define amdgpu_kernel void @atomic_max_i32_noret(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 {
+define amdgpu_kernel void @atomic_max_i32_noret(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) nounwind {
 ; GCN-LABEL: atomic_max_i32_noret:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
@@ -93,6 +93,3 @@ atomic:
 exit:
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
index 9b7d851a0c74e3..cc950dbc1636ce 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-worklist.ll
@@ -11,7 +11,7 @@
 ; GCN-NEXT: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-NEXT: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @in_worklist_once() #0 {
+define amdgpu_kernel void @in_worklist_once() nounwind {
 bb:
 	%tmp = load i64, ptr addrspace(5) undef
 br label %bb1
@@ -25,5 +25,3 @@ bb1:                                              ; preds = %bb1, %bb
 	%tmp16 = xor i64 %tmp15, %tmp3
 br label %bb1
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/movreld-bug.ll b/llvm/test/CodeGen/AMDGPU/movreld-bug.ll
index e2deac2167420d..d9578bc9e0871d 100644
--- a/llvm/test/CodeGen/AMDGPU/movreld-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/movreld-bug.ll
@@ -13,11 +13,9 @@
 
 ; GCN-NEXT: v_mov_b32_e32 v0, v1
 ; GCN-NEXT: ; return
-define amdgpu_ps float @main(i32 inreg %arg) #0 {
+define amdgpu_ps float @main(i32 inreg %arg) "InitialPSInputAddr"="36983" {
 main_body:
   %tmp24 = insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %arg
   %tmp25 = extractelement <16 x float> %tmp24, i32 1
   ret float %tmp25
 }
-
-attributes #0 = { "InitialPSInputAddr"="36983" }
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
index b5ee6689f8dc39..b3830d40b97dbb 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
@@ -9,7 +9,7 @@
 ; Test that we correctly legalize VGPR Rsrc operands in MUBUF instructions.
 ; Uses the old forms of the buffer intrinsics that don't take pointer arguments.
 
-define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
+define float @mubuf_vgpr(<4 x i32> %i, i32 %c) nounwind {
 ; GFX9_W64-LABEL: mubuf_vgpr:
 ; GFX9_W64:       ; %bb.0:
 ; GFX9_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -247,7 +247,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
-  %call = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) #1
+  %call = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) nounwind readonly
   ret float %call
 }
 
@@ -255,7 +255,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 
 ; FIXME: redundant s_mov
 
-define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr addrspace(1) %out0, ptr addrspace(1) %out1) #0 {
+define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind {
 ; GFX9_W64-LABEL: mubuf_vgpr_adjacent_in_block:
 ; GFX9_W64:       ; %bb.0: ; %entry
 ; GFX9_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -716,8 +716,8 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) #1
-  %val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %c, i32 0, i32 0, i32 0) #1
+  %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) nounwind readonly
+  %val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %c, i32 0, i32 0, i32 0) nounwind readonly
   store volatile float %val0, ptr addrspace(1) %out0
   store volatile float %val1, ptr addrspace(1) %out1
   ret void
@@ -726,7 +726,7 @@ entry:
 ; Confirm spills do not occur between the XOR and branch that terminate the
 ; waterfall loop BBs.
 
-define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr addrspace(1) %in, ptr addrspace(1) %out) nounwind {
 ; GFX9_W64-LABEL: mubuf_vgpr_outside_entry:
 ; GFX9_W64:       ; %bb.0: ; %entry
 ; GFX9_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1271,13 +1271,13 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" ()
-  %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %live.out.reg, i32 0, i32 0, i32 0) #1
-  %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %live.out.reg, i32 0, i32 0, i32 0) nounwind readonly
+  %idx = call i32 @llvm.amdgcn.workitem.id.x() nounwind readonly
   %cmp = icmp eq i32 %idx, 0
   br i1 %cmp, label %bb1, label %bb2
 
 bb1:
-  %val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %live.out.reg, i32 0, i32 0, i32 0) #1
+  %val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %live.out.reg, i32 0, i32 0, i32 0) nounwind readonly
   br label %bb2
 
 bb2:
@@ -1286,8 +1286,5 @@ bb2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readonly
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 162c47f879465c..db62ae8704a282 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -8,7 +8,7 @@
 
 ; Test that we correctly legalize VGPR Rsrc operands in MUBUF instructions.
 
-define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
+define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) nounwind {
 ; GFX9_W64-LABEL: mubuf_vgpr:
 ; GFX9_W64:       ; %bb.0:
 ; GFX9_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -261,7 +261,7 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
 ; W64-O0-NEXT:    s_mov_b64 exec, s[4:5]
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
-  %call = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %i, i32 %c, i32 0, i32 0, i32 0) #1
+  %call = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %i, i32 %c, i32 0, i32 0, i32 0) nounwind readonly
   ret float %call
 }
 
@@ -269,7 +269,7 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
 
 ; FIXME: redundant s_mov
 
-define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) %j, i32 %c, ptr addrspace(1) %out0, ptr addrspace(1) %out1) #0 {
+define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) %j, i32 %c, ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind {
 ; GFX9_W64-LABEL: mubuf_vgpr_adjacent_in_block:
 ; GFX9_W64:       ; %bb.0: ; %entry
 ; GFX9_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -755,8 +755,8 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
 ; W64-O0-NEXT:    s_waitcnt vmcnt(0)
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %val0 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %i, i32 %c, i32 0, i32 0, i32 0) #1
-  %val1 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %j, i32 %c, i32 0, i32 0, i32 0) #1
+  %val0 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %i, i32 %c, i32 0, i32 0, i32 0) nounwind readonly
+  %val1 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %j, i32 %c, i32 0, i32 0, i32 0) nounwind readonly
   store volatile float %val0, ptr addrspace(1) %out0
   store volatile float %val1, ptr addrspace(1) %out1
   ret void
@@ -765,7 +765,7 @@ entry:
 ; Confirm spills do not occur between the XOR and branch that terminate the
 ; waterfall loop BBs.
 
-define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, i32 %c, ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, i32 %c, ptr addrspace(1) %in, ptr addrspace(1) %out) nounwind {
 ; GFX9_W64-LABEL: mubuf_vgpr_outside_entry:
 ; GFX9_W64:       ; %bb.0: ; %entry
 ; GFX9_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1348,13 +1348,13 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
 ; W64-O0-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" ()
-  %val0 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %i, i32 %live.out.reg, i32 0, i32 0, i32 0) #1
-  %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %val0 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %i, i32 %live.out.reg, i32 0, i32 0, i32 0) nounwind readonly
+  %idx = call i32 @llvm.amdgcn.workitem.id.x() nounwind readonly
   %cmp = icmp eq i32 %idx, 0
   br i1 %cmp, label %bb1, label %bb2
 
 bb1:
-  %val1 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %j, i32 %live.out.reg, i32 0, i32 0, i32 0) #1
+  %val1 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %j, i32 %live.out.reg, i32 0, i32 0, i32 0) nounwind readonly
   br label %bb2
 
 bb2:
@@ -1363,8 +1363,5 @@ bb2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readonly
+declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll b/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll
index 4ba5f3abcb24b1..35080d7fa71664 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-offset-private.ll
@@ -6,49 +6,49 @@
 
 ; GCN-LABEL: {{^}}store_private_offset_i8:
 ; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @store_private_offset_i8() #0 {
+define amdgpu_kernel void @store_private_offset_i8() nounwind {
   store volatile i8 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}store_private_offset_i16:
 ; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @store_private_offset_i16() #0 {
+define amdgpu_kernel void @store_private_offset_i16() nounwind {
   store volatile i16 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}store_private_offset_i32:
 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @store_private_offset_i32() #0 {
+define amdgpu_kernel void @store_private_offset_i32() nounwind {
   store volatile i32 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}store_private_offset_v2i32:
 ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @store_private_offset_v2i32() #0 {
+define amdgpu_kernel void @store_private_offset_v2i32() nounwind {
   store volatile <2 x i32> <i32 5, i32 10>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}store_private_offset_v4i32:
 ; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @store_private_offset_v4i32() #0 {
+define amdgpu_kernel void @store_private_offset_v4i32() nounwind {
   store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}load_private_offset_i8:
 ; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @load_private_offset_i8() #0 {
+define amdgpu_kernel void @load_private_offset_i8() nounwind {
   %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}sextload_private_offset_i8:
 ; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @sextload_private_offset_i8(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @sextload_private_offset_i8(ptr addrspace(1) %out) nounwind {
   %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   %sextload = sext i8 %load to i32
   store i32 %sextload, ptr addrspace(1) undef
@@ -57,7 +57,7 @@ define amdgpu_kernel void @sextload_private_offset_i8(ptr addrspace(1) %out) #0
 
 ; GCN-LABEL: {{^}}zextload_private_offset_i8:
 ; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @zextload_private_offset_i8(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @zextload_private_offset_i8(ptr addrspace(1) %out) nounwind {
   %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   %zextload = zext i8 %load to i32
   store i32 %zextload, ptr addrspace(1) undef
@@ -66,14 +66,14 @@ define amdgpu_kernel void @zextload_private_offset_i8(ptr addrspace(1) %out) #0
 
 ; GCN-LABEL: {{^}}load_private_offset_i16:
 ; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @load_private_offset_i16() #0 {
+define amdgpu_kernel void @load_private_offset_i16() nounwind {
   %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}sextload_private_offset_i16:
 ; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @sextload_private_offset_i16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @sextload_private_offset_i16(ptr addrspace(1) %out) nounwind {
   %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   %sextload = sext i16 %load to i32
   store i32 %sextload, ptr addrspace(1) undef
@@ -82,7 +82,7 @@ define amdgpu_kernel void @sextload_private_offset_i16(ptr addrspace(1) %out) #0
 
 ; GCN-LABEL: {{^}}zextload_private_offset_i16:
 ; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @zextload_private_offset_i16(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @zextload_private_offset_i16(ptr addrspace(1) %out) nounwind {
   %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   %zextload = zext i16 %load to i32
   store i32 %zextload, ptr addrspace(1) undef
@@ -91,28 +91,28 @@ define amdgpu_kernel void @zextload_private_offset_i16(ptr addrspace(1) %out) #0
 
 ; GCN-LABEL: {{^}}load_private_offset_i32:
 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @load_private_offset_i32() #0 {
+define amdgpu_kernel void @load_private_offset_i32() nounwind {
   %load = load volatile i32, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}load_private_offset_v2i32:
 ; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @load_private_offset_v2i32() #0 {
+define amdgpu_kernel void @load_private_offset_v2i32() nounwind {
   %load = load volatile <2 x i32>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}load_private_offset_v4i32:
 ; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], 0 offset:8
-define amdgpu_kernel void @load_private_offset_v4i32() #0 {
+define amdgpu_kernel void @load_private_offset_v4i32() nounwind {
   %load = load volatile <4 x i32>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5))
   ret void
 }
 
 ; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
 ; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], 0 offset:4095
-define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
+define amdgpu_kernel void @store_private_offset_i8_max_offset() nounwind {
   store volatile i8 5, ptr addrspace(5) inttoptr (i32 4095 to ptr addrspace(5))
   ret void
 }
@@ -120,7 +120,7 @@ define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
 ; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1:
 ; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
 ; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], 0 offen{{$}}
-define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
+define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() nounwind {
   store volatile i8 5, ptr addrspace(5) inttoptr (i32 4096 to ptr addrspace(5))
   ret void
 }
@@ -128,7 +128,7 @@ define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
 ; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2:
 ; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
 ; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], 0 offen offset:1{{$}}
-define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
+define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() nounwind {
   store volatile i8 5, ptr addrspace(5) inttoptr (i32 4097 to ptr addrspace(5))
   ret void
 }
@@ -143,7 +143,7 @@ define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
 
 ; GFX9: v_add_u32_e32 [[ADDR:v[0-9]+]], 0,
 ; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen offset:32
-define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 {
+define amdgpu_kernel void @store_private_unknown_bits_vaddr() nounwind {
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %vaddr = load volatile i32, ptr addrspace(1) undef
   %vaddr.off = add i32 %vaddr, 8
@@ -151,5 +151,3 @@ define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 {
   store volatile i32 9, ptr addrspace(5) %gep
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll
index c0c93f7badde25..2bf2ca447b5375 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll
@@ -46,8 +46,5 @@ main_body:
   ret float %tmp7
 }
 
-declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #1
-declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) #1
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
index 202ce009ef69a5..d4641d2b4d2454 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
@@ -43,8 +43,5 @@ main_body:
   ret float %tmp7
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
-declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32 immarg) #1
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind readonly }
+declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf.ll b/llvm/test/CodeGen/AMDGPU/mubuf.ll
index dd9f5fa7fcb15d..13935c01c96962 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf.ll
@@ -173,12 +173,6 @@ define amdgpu_kernel void @store_vgpr_ptr(ptr addrspace(1) %out) {
 }
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #2
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32 immarg) #3
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { nounwind willreturn writeonly }
-attributes #3 = { nounwind readonly willreturn }
-attributes #4 = { readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
+declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) nounwind willreturn writeonly
+declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index 1e9994dd8e6efd..1254e5d10a186a 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -6,7 +6,7 @@
 ; should be introduced before LSR or not. It seems to help in some
 ; cases, and hurt others.
 
-define void @lsr_order_mul24_0(i32 %arg, i32 %arg2, i32 %arg6, i32 %arg13, i32 %arg16) #0 {
+define void @lsr_order_mul24_0(i32 %arg, i32 %arg2, i32 %arg6, i32 %arg13, i32 %arg16) nounwind willreturn "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX9-LABEL: lsr_order_mul24_0:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -50,7 +50,7 @@ bb23:                                             ; preds = %bb23, %bb
   br i1 %tmp37, label %bb23, label %.loopexit
 }
 
-define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(3) nocapture %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, ptr addrspace(1) nocapture readonly %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, i32 %arg16, i1 zeroext %arg17, i1 zeroext %arg18) #0 {
+define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(3) nocapture %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, ptr addrspace(1) nocapture readonly %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, i32 %arg16, i1 zeroext %arg17, i1 zeroext %arg18) nounwind willreturn "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX9-LABEL: lsr_order_mul24_1:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -120,7 +120,7 @@ bb19:                                             ; preds = %bb
 bb23:                                             ; preds = %bb19, %bb23
   %tmp24 = phi i32 [ %arg, %bb19 ], [ %tmp47, %bb23 ]
   %tmp25 = uitofp i32 %tmp24 to float
-  %tmp26 = tail call float @llvm.fmuladd.f32(float %tmp25, float %tmp21, float 0x3EE4F8B580000000) #2
+  %tmp26 = tail call float @llvm.fmuladd.f32(float %tmp25, float %tmp21, float 0x3EE4F8B580000000) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign"
   %tmp27 = fptoui float %tmp26 to i32
   %tmp28 = and i32 %tmp27, 16777215
   %tmp29 = mul i32 %tmp28, %tmp22
@@ -147,7 +147,7 @@ bb23:                                             ; preds = %bb19, %bb23
   br i1 %tmp48, label %bb23, label %.loopexit
 }
 
-define void @slsr1_0(i32 %b.arg, i32 %s.arg) #0 {
+define void @slsr1_0(i32 %b.arg, i32 %s.arg) nounwind willreturn "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX9-LABEL: slsr1_0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -184,7 +184,7 @@ define void @slsr1_0(i32 %b.arg, i32 %s.arg) #0 {
   ret void
 }
 
-define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
+define void @slsr1_1(i32 %b.arg, i32 %s.arg) nounwind willreturn "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX9-LABEL: slsr1_1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -260,11 +260,7 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
   ret void
 }
 
-declare void @foo(i32) #2
-declare float @llvm.fmuladd.f32(float, float, float) #1
-
-attributes #0 = { nounwind willreturn "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+declare void @foo(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign"
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone speculatable
 
 !0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
index 357b851a8f56f1..c37590f0d87f0c 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@@ -6,7 +6,7 @@
 ; RUN: llc -mtriple=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
 
 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-define amdgpu_kernel void @test_smul24_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @test_smul24_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: test_smul24_i32:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -97,7 +97,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_smulhi24_i64(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @test_smulhi24_i64(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: test_smulhi24_i64:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -271,7 +271,7 @@ define <2 x i64> @test_smul48_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
 ; unnecessary extension instructions because after legalization they
 ; will not be removed by SimplifyDemandedBits because there are
 ; multiple uses by the separate mul and mulhi.
-define amdgpu_kernel void @test_smul24_i64(ptr addrspace(1) %out, [8 x i32], i32 %a, [8 x i32], i32 %b) #0 {
+define amdgpu_kernel void @test_smul24_i64(ptr addrspace(1) %out, [8 x i32], i32 %a, [8 x i32], i32 %b) nounwind {
 ; SI-LABEL: test_smul24_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -373,7 +373,7 @@ define amdgpu_kernel void @test_smul24_i64(ptr addrspace(1) %out, [8 x i32], i32
   ret void
 }
 
-define amdgpu_kernel void @test_smul24_i64_square(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @test_smul24_i64_square(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: test_smul24_i64_square:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -460,7 +460,7 @@ define amdgpu_kernel void @test_smul24_i64_square(ptr addrspace(1) %out, i32 %a,
   ret void
 }
 
-define amdgpu_kernel void @test_smul24_i33(ptr addrspace(1) %out, i33 %a, i33 %b) #0 {
+define amdgpu_kernel void @test_smul24_i33(ptr addrspace(1) %out, i33 %a, i33 %b) nounwind {
 ; SI-LABEL: test_smul24_i33:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -813,4 +813,3 @@ bb7:
   ret void
 
 }
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index 4eefff504f19ee..2aa6162694e429 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -103,9 +103,9 @@
 
 ; GCN: ; %UnifiedReturnBlock
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @multi_divergent_region_exit_ret_ret(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) #0 {
+define amdgpu_kernel void @multi_divergent_region_exit_ret_ret(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) nounwind {
 entry:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp1 = add i32 0, %tmp
   %tmp2 = zext i32 %tmp1 to i64
   %tmp3 = add i64 0, %tmp2
@@ -158,9 +158,9 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 ; GCN-LABEL: {{^}}multi_divergent_region_exit_unreachable_unreachable:
 ; GCN: ; %UnifiedUnreachableBlock
 ; GCN-NEXT: .Lfunc_end
-define amdgpu_kernel void @multi_divergent_region_exit_unreachable_unreachable(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) #0 {
+define amdgpu_kernel void @multi_divergent_region_exit_unreachable_unreachable(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) nounwind {
 entry:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp1 = add i32 0, %tmp
   %tmp2 = zext i32 %tmp1 to i64
   %tmp3 = add i64 0, %tmp2
@@ -239,9 +239,9 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 ; IR: UnifiedReturnBlock:
 ; IR: call void @llvm.amdgcn.end.cf.i64(i64 %11)
 ; IR: ret void
-define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2, i32 %arg3) #0 {
+define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2, i32 %arg3) nounwind {
 entry:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp1 = add i32 0, %tmp
   %tmp2 = zext i32 %tmp1 to i64
   %tmp3 = add i64 0, %tmp2
@@ -288,9 +288,9 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 ; IR: call void @llvm.amdgcn.end.cf.i64(i64 %16)
 ; IR: %9 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %8)
 
-define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2, i32 %arg3) #0 {
+define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2, i32 %arg3) nounwind {
 entry:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp1 = add i32 0, %tmp
   %tmp2 = zext i32 %tmp1 to i64
   %tmp3 = add i64 0, %tmp2
@@ -334,7 +334,7 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 ; IR: %UnifiedRetVal = phi float [ %8, %Flow2 ], [ 1.000000e+00, %exit0 ]
 ; IR: call void @llvm.amdgcn.end.cf.i64(i64 %12)
 ; IR: ret float %UnifiedRetVal
-define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
+define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) nounwind {
 entry:
   %Pivot = icmp slt i32 %vgpr, 2
   br i1 %Pivot, label %LeafBlock, label %LeafBlock1
@@ -376,7 +376,7 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN-NEXT: ; return
 
-define amdgpu_ps float @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(i32 inreg %sgpr, i32 %vgpr) #0 {
+define amdgpu_ps float @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(i32 inreg %sgpr, i32 %vgpr) nounwind {
 entry:
   %uniform.cond = icmp slt i32 %sgpr, 2
   br i1 %uniform.cond, label %LeafBlock, label %LeafBlock1
@@ -433,9 +433,9 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 ; IR: UnifiedReturnBlock:
 ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %11)
 ; IR-NEXT: ret void
-define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) #0 {
+define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) nounwind {
 entry:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp1 = add i32 0, %tmp
   %tmp2 = zext i32 %tmp1 to i64
   %tmp3 = add i64 0, %tmp2
@@ -489,9 +489,9 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 ; IR: UnifiedReturnBlock:                               ; preds = %exit0, %Flow2
 ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %11)
 ; IR-NEXT: ret void
-define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) #0 {
+define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) nounwind {
 entry:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp1 = add i32 0, %tmp
   %tmp2 = zext i32 %tmp1 to i64
   %tmp3 = add i64 0, %tmp2
@@ -531,9 +531,9 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 }
 
 ; IR-LABEL: @multi_divergent_region_exit_ret_switch(
-define amdgpu_kernel void @multi_divergent_region_exit_ret_switch(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) #0 {
+define amdgpu_kernel void @multi_divergent_region_exit_ret_switch(ptr addrspace(1) nocapture %arg0, ptr addrspace(1) nocapture %arg1, ptr addrspace(1) nocapture %arg2) nounwind {
 entry:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp1 = add i32 0, %tmp
   %tmp2 = zext i32 %tmp1 to i64
   %tmp3 = add i64 0, %tmp2
@@ -570,7 +570,7 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 }
 
 ; IR-LABEL: @divergent_multi_ret_nest_in_uniform_triangle(
-define amdgpu_kernel void @divergent_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
+define amdgpu_kernel void @divergent_multi_ret_nest_in_uniform_triangle(i32 %arg0) nounwind {
 entry:
   %uniform.cond0 = icmp eq i32 %arg0, 4
   br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
@@ -594,7 +594,7 @@ uniform.ret:
 }
 
 ; IR-LABEL: @divergent_complex_multi_ret_nest_in_uniform_triangle(
-define amdgpu_kernel void @divergent_complex_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
+define amdgpu_kernel void @divergent_complex_multi_ret_nest_in_uniform_triangle(i32 %arg0) nounwind {
 entry:
   %uniform.cond0 = icmp eq i32 %arg0, 4
   br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
@@ -644,7 +644,7 @@ uniform.ret:
 ; IR: UnifiedReturnBlock:                               ; preds = %Flow3, %Flow2
 ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %5)
 ; IR-NEXT: ret void
-define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 {
+define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) nounwind {
 entry:
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %divergent.cond0 = icmp eq i32 %id.x, 0
@@ -690,7 +690,7 @@ divergent.ret:
 ; IR: UnifiedReturnBlock:
 ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64
 ; IR-NEXT: ret void
-define amdgpu_kernel void @multi_divergent_unreachable_exit() #0 {
+define amdgpu_kernel void @multi_divergent_unreachable_exit() nounwind {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   switch i32 %tmp, label %bb3 [
@@ -747,8 +747,5 @@ bb27:                                             ; preds = %.entry
   ret void
 }
 
-declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index 16de2c0c6de08c..f8c9107a38d7a9 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -108,7 +108,7 @@ ENDIF:                                            ; preds = %LOOP
   br i1 %tmp51, label %LOOP, label %LOOP.outer
 }
 
-define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @multi_if_break_loop(i32 %arg) nounwind {
 ; OPT-LABEL: @multi_if_break_loop(
 ; OPT-NEXT:  bb:
 ; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
@@ -246,7 +246,4 @@ bb9:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
index cbdc7bb456346f..68afa17c669e42 100644
--- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -verify-machineinstrs < %s | FileCheck %s
 
 ; FP is in CSR range, modified.
-define hidden fastcc void @callee_has_fp() #1 {
+define hidden fastcc void @callee_has_fp() "frame-pointer"="all" noinline {
 ; CHECK-LABEL: callee_has_fp:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -23,7 +23,7 @@ define hidden fastcc void @callee_has_fp() #1 {
 ; Has no stack objects, but introduces them due to the CSR spill. We
 ; see the FP modified in the callee with IPRA. We should not have
 ; redundant spills of s33 or assert.
-define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
+define internal fastcc void @csr_vgpr_spill_fp_callee() "frame-pointer"="none" noinline {
 ; CHECK-LABEL: csr_vgpr_spill_fp_callee:
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -97,7 +97,7 @@ bb:
 }
 
 ; Same, except with a tail call.
-define internal fastcc void @csr_vgpr_spill_fp_tailcall_callee() #0 {
+define internal fastcc void @csr_vgpr_spill_fp_tailcall_callee() "frame-pointer"="none" noinline {
 ; CHECK-LABEL: csr_vgpr_spill_fp_tailcall_callee:
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -156,7 +156,7 @@ bb:
   ret void
 }
 
-define hidden i32 @tail_call() #1 {
+define hidden i32 @tail_call() "frame-pointer"="all" noinline {
 ; CHECK-LABEL: tail_call:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -169,7 +169,7 @@ entry:
   ret i32 0
 }
 
-define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 {
+define hidden i32 @caller_save_vgpr_spill_fp_tail_call() "frame-pointer"="none" noinline {
 ; CHECK-LABEL: caller_save_vgpr_spill_fp_tail_call:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -204,7 +204,7 @@ entry:
   ret i32 %call
 }
 
-define hidden i32 @caller_save_vgpr_spill_fp() #0 {
+define hidden i32 @caller_save_vgpr_spill_fp() "frame-pointer"="none" noinline {
 ; CHECK-LABEL: caller_save_vgpr_spill_fp:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -271,8 +271,5 @@ entry:
   ret void
 }
 
-attributes #0 = { "frame-pointer"="none" noinline }
-attributes #1 = { "frame-pointer"="all" noinline }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
index 64a94a5ee0e709..77ae6853f492f7 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
@@ -5,7 +5,7 @@
 ; Test calls when called by other callable functions rather than
 ; kernels.
 
-declare void @external_void_func_i32(i32) #0
+declare void @external_void_func_i32(i32) nounwind
 
 ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm:
 ; GCN: s_waitcnt
@@ -33,7 +33,7 @@ declare void @external_void_func_i32(i32) #0
 ; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: s_setpc_b64 s[30:31]
-define void @test_func_call_external_void_func_i32_imm() #0 {
+define void @test_func_call_external_void_func_i32_imm() nounwind {
   call void @external_void_func_i32(i32 42)
   ret void
 }
@@ -46,7 +46,7 @@ define void @test_func_call_external_void_func_i32_imm() #0 {
 ; GCN: s_swappc_b64
 ; GCN: s_addk_i32 s32, 0xec00{{$}}
 ; GCN: s_setpc_b64
-define void @test_func_call_external_void_func_i32_imm_stack_use() #0 {
+define void @test_func_call_external_void_func_i32_imm_stack_use() nounwind {
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep15 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 16
   store volatile i32 0, ptr addrspace(5) %alloca
@@ -54,7 +54,3 @@ define void @test_func_call_external_void_func_i32_imm_stack_use() #0 {
   call void @external_void_func_i32(i32 42)
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
index f6e3509eb029b1..11704b33127fc8 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -7,7 +7,7 @@
 ; the condition that appears to have no uses until the loop is
 ; completely processed.
 
-define amdgpu_kernel void @reduced_nested_loop_conditions(ptr addrspace(3) nocapture %arg) #0 {
+define amdgpu_kernel void @reduced_nested_loop_conditions(ptr addrspace(3) nocapture %arg) nounwind {
 ; GCN-LABEL: reduced_nested_loop_conditions:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dword s0, s[0:1], 0x9
@@ -95,7 +95,7 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(ptr addrspace(3) nocap
 ; IR-NEXT:    ret void
 ;
 bb:
-  %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %my.tmp1 = getelementptr inbounds i64, ptr addrspace(3) %arg, i32 %my.tmp
   %my.tmp2 = load volatile i64, ptr addrspace(3) %my.tmp1
   br label %bb5
@@ -144,7 +144,7 @@ bb23:                                             ; preds = %bb10
 
 ; Earlier version of above, before a run of the structurizer.
 
-define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) nocapture %arg) #0 {
+define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) nocapture %arg) nounwind {
 ; GCN-LABEL: nested_loop_conditions:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
@@ -284,7 +284,7 @@ bb:
   br i1 %my.tmp1235, label %bb14.lr.ph, label %bb13
 
 bb14.lr.ph:                                       ; preds = %bb
-  %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %my.tmp1 = zext i32 %my.tmp to i64
   %my.tmp2 = getelementptr inbounds i64, ptr addrspace(1) %arg, i64 %my.tmp1
   %my.tmp3 = load i64, ptr addrspace(1) %my.tmp2, align 16
@@ -339,7 +339,4 @@ bb31:                                             ; preds = %bb31.loopexit, %bb1
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
index d62f045674acec..55191f467bd55c 100644
--- a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
@@ -119,10 +119,7 @@ loop0_merge:                                      ; preds = %branch2_merge, %bb
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.fma.f32(float, float, float) #0
+declare float @llvm.fma.f32(float, float, float) nocallback nofree nosync nounwind readnone speculatable willreturn
 
 ; Function Attrs: nounwind readonly willreturn
-declare float @llvm.amdgcn.image.sample.lz.3d.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
-
-attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
-attributes #1 = { nounwind readonly willreturn }
+declare float @llvm.amdgcn.image.sample.lz.3d.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll
index 34e67d0993fb7a..86af4b8ea7c0a5 100644
--- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll
@@ -4,7 +4,7 @@
 ; Test that source locations (.loc directives) are not added to the code within the prologue.
 
 ; Function Attrs: convergent mustprogress nounwind
-define hidden void @_ZL3barv() #0 !dbg !1644 {
+define hidden void @_ZL3barv() nounwind "frame-pointer"="all" !dbg !1644 {
 ; CHECK-LABEL: _ZL3barv:
 ; CHECK:       .Lfunc_begin0:
 ; CHECK-NEXT:    .file 0 "/tmp" "lane-info.cpp" md5 0x4ab9b75a30baffdf0f6f536a80e3e382
@@ -55,9 +55,7 @@ entry:
 }
 
 ; Function Attrs: convergent nounwind
-declare void @_ZL13sleep_foreverv() #0
-
-attributes #0 = { nounwind "frame-pointer"="all" }
+declare void @_ZL13sleep_foreverv() nounwind "frame-pointer"="all"
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!1638, !1639, !1640, !1641}
diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
index 125e6bc0f787f1..c1c30e52a72827 100644
--- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
@@ -15,7 +15,7 @@
 
 ; FIXME: FunctionLoweringInfo unhelpfully doesn't preserve an
 ; alignment less than the stack alignment.
-define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) #1 {
+define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" {
 ; MUBUF-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4:
 ; MUBUF:       ; %bb.0: ; %entry
 ; MUBUF-NEXT:    s_add_u32 s0, s0, s9
@@ -399,10 +399,7 @@ bb.1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll b/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll
index 6df702d61602ad..55397ccf535359 100644
--- a/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll
+++ b/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll
@@ -23,7 +23,7 @@ main_body:
 ; GCN-LABEL: {{^}}sample_contig_nsa_10vgprs:
 ; GCN-DAG: image_sample_c_l v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
 ; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
-define amdgpu_ps <2 x float> @sample_contig_nsa_10vgprs(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) #0 {
+define amdgpu_ps <2 x float> @sample_contig_nsa_10vgprs(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) "amdgpu-num-vgpr"="10" {
 main_body:
   %zcompare.1 = fadd float %zcompare, 1.0
   %s1.1 = fadd float %s1, 1.0
@@ -98,5 +98,3 @@ main_body:
 
 declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-
-attributes #0 = {"amdgpu-num-vgpr"="10"}
diff --git a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
index 5b1723a051c5fd..1910818ef187aa 100644
--- a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
+++ b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
@@ -1,10 +1,8 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass greedy,amdgpu-nsa-reassign,virtregrewriter,si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s
 
 --- |
-  define amdgpu_kernel void @nsa_reassign() #0 { ret void }
-  define amdgpu_kernel void @do_not_reassign_spill() #0 { ret void }
-
-  attributes #0 = { "amdgpu-num-vgpr"="8" }
+  define amdgpu_kernel void @nsa_reassign() "amdgpu-num-vgpr"="8" { ret void }
+  define amdgpu_kernel void @do_not_reassign_spill() "amdgpu-num-vgpr"="8" { ret void }
 ...
 
 # GCN-LABEL: name: nsa_reassign
diff --git a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
index d87eb9711488c3..02b1ce1b9c4197 100644
--- a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
+++ b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
@@ -19,7 +19,7 @@
 ; GFX1010:    ; Occupancy: 20
 ; GFX1030:    ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @max_occupancy() #10 {
+define amdgpu_kernel void @max_occupancy() "amdgpu-flat-work-group-size"="1,32" {
   ret void
 }
 
@@ -29,7 +29,7 @@ define amdgpu_kernel void @max_occupancy() #10 {
 ; GFX10W32:   ; Occupancy: 4
 ; GFX1100W64: ; Occupancy: 3
 ; GFX1100W32: ; Occupancy: 5
-define amdgpu_kernel void @limited_occupancy_3() #0 {
+define amdgpu_kernel void @limited_occupancy_3() "amdgpu-waves-per-eu"="2,3" "amdgpu-flat-work-group-size"="1,64" {
   ret void
 }
 
@@ -38,7 +38,7 @@ define amdgpu_kernel void @limited_occupancy_3() #0 {
 ; GFX1010:    ; Occupancy: 18
 ; GFX1030:    ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @limited_occupancy_18() #1 {
+define amdgpu_kernel void @limited_occupancy_18() "amdgpu-waves-per-eu"="18,18" "amdgpu-flat-work-group-size"="1,32" {
   ret void
 }
 
@@ -47,7 +47,7 @@ define amdgpu_kernel void @limited_occupancy_18() #1 {
 ; GFX1010:    ; Occupancy: 20
 ; GFX1030:    ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @limited_occupancy_19() #2 {
+define amdgpu_kernel void @limited_occupancy_19() "amdgpu-waves-per-eu"="19,19" "amdgpu-flat-work-group-size"="1,32" {
   ret void
 }
 
@@ -56,7 +56,7 @@ define amdgpu_kernel void @limited_occupancy_19() #2 {
 ; GFX1010:    ; Occupancy: 20
 ; GFX1030:    ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_24_vgprs() #10 {
+define amdgpu_kernel void @used_24_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v23}" ()
   ret void
 }
@@ -67,7 +67,7 @@ define amdgpu_kernel void @used_24_vgprs() #10 {
 ; GFX1010W32: ; Occupancy: 20
 ; GFX1030:    ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_28_vgprs() #10 {
+define amdgpu_kernel void @used_28_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v27}" ()
   ret void
 }
@@ -78,7 +78,7 @@ define amdgpu_kernel void @used_28_vgprs() #10 {
 ; GFX1010W32: ; Occupancy: 20
 ; GFX1030W32: ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_32_vgprs() #10 {
+define amdgpu_kernel void @used_32_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v31}" ()
   ret void
 }
@@ -90,7 +90,7 @@ define amdgpu_kernel void @used_32_vgprs() #10 {
 ; GFX1030W64: ; Occupancy: 12
 ; GFX1030W32: ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_36_vgprs() #10 {
+define amdgpu_kernel void @used_36_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v35}" ()
   ret void
 }
@@ -101,7 +101,7 @@ define amdgpu_kernel void @used_36_vgprs() #10 {
 ; GFX1010W32: ; Occupancy: 20
 ; GFX1030W32: ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_40_vgprs() #10 {
+define amdgpu_kernel void @used_40_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v39}" ()
   ret void
 }
@@ -113,7 +113,7 @@ define amdgpu_kernel void @used_40_vgprs() #10 {
 ; GFX1030W64: ; Occupancy: 10
 ; GFX1030W32: ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_44_vgprs() #10 {
+define amdgpu_kernel void @used_44_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v43}" ()
   ret void
 }
@@ -124,7 +124,7 @@ define amdgpu_kernel void @used_44_vgprs() #10 {
 ; GFX1010W32: ; Occupancy: 20
 ; GFX1030W32: ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_48_vgprs() #10 {
+define amdgpu_kernel void @used_48_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v47}" ()
   ret void
 }
@@ -136,7 +136,7 @@ define amdgpu_kernel void @used_48_vgprs() #10 {
 ; GFX1030W32: ; Occupancy: 16
 ; GFX1100W64: ; Occupancy: 12
 ; GFX1100W32: ; Occupancy: 16
-define amdgpu_kernel void @used_56_vgprs() #10 {
+define amdgpu_kernel void @used_56_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v55}" ()
   ret void
 }
@@ -147,7 +147,7 @@ define amdgpu_kernel void @used_56_vgprs() #10 {
 ; GFX10W32:   ; Occupancy: 16
 ; GFX1100W64: ; Occupancy: 10
 ; GFX1100W32: ; Occupancy: 16
-define amdgpu_kernel void @used_64_vgprs() #10 {
+define amdgpu_kernel void @used_64_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v63}" ()
   ret void
 }
@@ -159,7 +159,7 @@ define amdgpu_kernel void @used_64_vgprs() #10 {
 ; GFX1030W32: ; Occupancy: 12
 ; GFX1100W64: ; Occupancy: 10
 ; GFX1100W32: ; Occupancy: 16
-define amdgpu_kernel void @used_72_vgprs() #10 {
+define amdgpu_kernel void @used_72_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v71}" ()
   ret void
 }
@@ -170,7 +170,7 @@ define amdgpu_kernel void @used_72_vgprs() #10 {
 ; GFX10W32:   ; Occupancy: 12
 ; GFX1100W64: ; Occupancy: 9
 ; GFX1100W32: ; Occupancy: 16
-define amdgpu_kernel void @used_80_vgprs() #10 {
+define amdgpu_kernel void @used_80_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v79}" ()
   ret void
 }
@@ -183,7 +183,7 @@ define amdgpu_kernel void @used_80_vgprs() #10 {
 ; GFX1030W32: ; Occupancy: 10
 ; GFX1100W64: ; Occupancy: 9
 ; GFX1100W32: ; Occupancy: 16
-define amdgpu_kernel void @used_84_vgprs() #10 {
+define amdgpu_kernel void @used_84_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v83}" ()
   ret void
 }
@@ -195,7 +195,7 @@ define amdgpu_kernel void @used_84_vgprs() #10 {
 ; GFX1030W32: ; Occupancy: 10
 ; GFX1100W64: ; Occupancy: 8
 ; GFX1100W32: ; Occupancy: 16
-define amdgpu_kernel void @used_88_vgprs() #10 {
+define amdgpu_kernel void @used_88_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v87}" ()
   ret void
 }
@@ -206,7 +206,7 @@ define amdgpu_kernel void @used_88_vgprs() #10 {
 ; GFX10W32:   ; Occupancy: 10
 ; GFX1100W64: ; Occupancy: 8
 ; GFX1100W32: ; Occupancy: 16
-define amdgpu_kernel void @used_96_vgprs() #10 {
+define amdgpu_kernel void @used_96_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v95}" ()
   ret void
 }
@@ -218,7 +218,7 @@ define amdgpu_kernel void @used_96_vgprs() #10 {
 ; GFX10W32:   ; Occupancy: 9
 ; GFX1100W64: ; Occupancy: 7
 ; GFX1100W32: ; Occupancy: 12
-define amdgpu_kernel void @used_100_vgprs() #10 {
+define amdgpu_kernel void @used_100_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v99}" ()
   ret void
 }
@@ -229,7 +229,7 @@ define amdgpu_kernel void @used_100_vgprs() #10 {
 ; GFX10W32:   ; Occupancy: 9
 ; GFX1100W64: ; Occupancy: 6
 ; GFX1100W32: ; Occupancy: 12
-define amdgpu_kernel void @used_112_vgprs() #10 {
+define amdgpu_kernel void @used_112_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v111}" ()
   ret void
 }
@@ -240,7 +240,7 @@ define amdgpu_kernel void @used_112_vgprs() #10 {
 ; GFX10W32:   ; Occupancy: 8
 ; GFX1100W64: ; Occupancy: 5
 ; GFX1100W32: ; Occupancy: 10
-define amdgpu_kernel void @used_128_vgprs() #10 {
+define amdgpu_kernel void @used_128_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v127}" ()
   ret void
 }
@@ -251,7 +251,7 @@ define amdgpu_kernel void @used_128_vgprs() #10 {
 ; GFX10W32:   ; Occupancy: 7
 ; GFX1100W64: ; Occupancy: 5
 ; GFX1100W32: ; Occupancy: 10
-define amdgpu_kernel void @used_144_vgprs() #10 {
+define amdgpu_kernel void @used_144_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v143}" ()
   ret void
 }
@@ -263,7 +263,7 @@ define amdgpu_kernel void @used_144_vgprs() #10 {
 ; GFX1030W32: ; Occupancy: 5
 ; GFX1100W64: ; Occupancy: 4
 ; GFX1100W32: ; Occupancy: 9
-define amdgpu_kernel void @used_168_vgprs() #10 {
+define amdgpu_kernel void @used_168_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v167}" ()
   ret void
 }
@@ -275,7 +275,7 @@ define amdgpu_kernel void @used_168_vgprs() #10 {
 ; GFX1030W32: ; Occupancy: 4
 ; GFX1100W64: ; Occupancy: 3
 ; GFX1100W32: ; Occupancy: 7
-define amdgpu_kernel void @used_200_vgprs() #10 {
+define amdgpu_kernel void @used_200_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v199}" ()
   ret void
 }
@@ -286,7 +286,7 @@ define amdgpu_kernel void @used_200_vgprs() #10 {
 ; GFX10W32:   ; Occupancy: 4
 ; GFX1100W64: ; Occupancy: 2
 ; GFX1100W32: ; Occupancy: 5
-define amdgpu_kernel void @used_256_vgprs() #10 {
+define amdgpu_kernel void @used_256_vgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{v255}" ()
   ret void
 }
@@ -296,7 +296,7 @@ define amdgpu_kernel void @used_256_vgprs() #10 {
 ; GFX1010:    ; Occupancy: 20
 ; GFX1030:    ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_80_sgprs() #10 {
+define amdgpu_kernel void @used_80_sgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{s79}" ()
   ret void
 }
@@ -306,7 +306,7 @@ define amdgpu_kernel void @used_80_sgprs() #10 {
 ; GFX1010:    ; Occupancy: 20
 ; GFX1030:    ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_88_sgprs() #10 {
+define amdgpu_kernel void @used_88_sgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{s87}" ()
   ret void
 }
@@ -316,7 +316,7 @@ define amdgpu_kernel void @used_88_sgprs() #10 {
 ; GFX1010:    ; Occupancy: 20
 ; GFX1030:    ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_100_sgprs() #10 {
+define amdgpu_kernel void @used_100_sgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{s99}" ()
   ret void
 }
@@ -326,7 +326,7 @@ define amdgpu_kernel void @used_100_sgprs() #10 {
 ; GFX1010:    ; Occupancy: 20
 ; GFX1030:    ; Occupancy: 16
 ; GFX1100:    ; Occupancy: 16
-define amdgpu_kernel void @used_101_sgprs() #10 {
+define amdgpu_kernel void @used_101_sgprs() "amdgpu-flat-work-group-size"="1,32" {
   call void asm sideeffect "", "~{s100}" ()
   ret void
 }
@@ -374,7 +374,7 @@ define amdgpu_kernel void @used_lds_13112() {
 ; GFX1100W64: ; Occupancy: 4{{$}}
 ; GFX1100W32: ; Occupancy: 8{{$}}
 @lds8252 = internal addrspace(3) global [8252 x i8] undef, align 4
-define amdgpu_kernel void @used_lds_8252_max_group_size_64() #3 {
+define amdgpu_kernel void @used_lds_8252_max_group_size_64() "amdgpu-flat-work-group-size"="1,64" {
   store volatile i8 1, ptr addrspace(3) @lds8252
   ret void
 }
@@ -385,7 +385,7 @@ define amdgpu_kernel void @used_lds_8252_max_group_size_64() #3 {
 ; GFX10W32:   ; Occupancy: 12{{$}}
 ; GFX1100W64: ; Occupancy: 8{{$}}
 ; GFX1100W32: ; Occupancy: 12{{$}}
-define amdgpu_kernel void @used_lds_8252_max_group_size_96() #4 {
+define amdgpu_kernel void @used_lds_8252_max_group_size_96() "amdgpu-flat-work-group-size"="1,96" {
   store volatile i8 1, ptr addrspace(3) @lds8252
   ret void
 }
@@ -396,7 +396,7 @@ define amdgpu_kernel void @used_lds_8252_max_group_size_96() #4 {
 ; GFX10W32:   ; Occupancy: 15{{$}}
 ; GFX1100W64: ; Occupancy: 8{{$}}
 ; GFX1100W32: ; Occupancy: 15{{$}}
-define amdgpu_kernel void @used_lds_8252_max_group_size_128() #5 {
+define amdgpu_kernel void @used_lds_8252_max_group_size_128() "amdgpu-flat-work-group-size"="1,128" {
   store volatile i8 1, ptr addrspace(3) @lds8252
   ret void
 }
@@ -408,7 +408,7 @@ define amdgpu_kernel void @used_lds_8252_max_group_size_128() #5 {
 ; GFX1030W32: ; Occupancy: 15{{$}}
 ; GFX1100W64: ; Occupancy: 12{{$}}
 ; GFX1100W32: ; Occupancy: 15{{$}}
-define amdgpu_kernel void @used_lds_8252_max_group_size_192() #6 {
+define amdgpu_kernel void @used_lds_8252_max_group_size_192() "amdgpu-flat-work-group-size"="1,192" {
   store volatile i8 1, ptr addrspace(3) @lds8252
   ret void
 }
@@ -420,7 +420,7 @@ define amdgpu_kernel void @used_lds_8252_max_group_size_192() #6 {
 ; GFX1030W32: ; Occupancy: 16{{$}}
 ; GFX1100W64: ; Occupancy: 15{{$}}
 ; GFX1100W32: ; Occupancy: 16{{$}}
-define amdgpu_kernel void @used_lds_8252_max_group_size_256() #7 {
+define amdgpu_kernel void @used_lds_8252_max_group_size_256() "amdgpu-flat-work-group-size"="1,256" {
   store volatile i8 1, ptr addrspace(3) @lds8252
   ret void
 }
@@ -430,7 +430,7 @@ define amdgpu_kernel void @used_lds_8252_max_group_size_256() #7 {
 ; GFX1010:    ; Occupancy: 20{{$}}
 ; GFX1030:    ; Occupancy: 16{{$}}
 ; GFX1100:    ; Occupancy: 16{{$}}
-define amdgpu_kernel void @used_lds_8252_max_group_size_512() #8 {
+define amdgpu_kernel void @used_lds_8252_max_group_size_512() "amdgpu-flat-work-group-size"="1,512" {
   store volatile i8 1, ptr addrspace(3) @lds8252
   ret void
 }
@@ -441,7 +441,7 @@ define amdgpu_kernel void @used_lds_8252_max_group_size_512() #8 {
 ; GFX1010W64: ; Occupancy: 20{{$}}
 ; GFX1030:    ; Occupancy: 16{{$}}
 ; GFX1100:    ; Occupancy: 16{{$}}
-define amdgpu_kernel void @used_lds_8252_max_group_size_1024() #9 {
+define amdgpu_kernel void @used_lds_8252_max_group_size_1024() "amdgpu-flat-work-group-size"="1,1024" {
   store volatile i8 1, ptr addrspace(3) @lds8252
   ret void
 }
@@ -450,19 +450,7 @@ define amdgpu_kernel void @used_lds_8252_max_group_size_1024() #9 {
 ; GFX9:       ; Occupancy: 2{{$}}
 ; GFX10:      ; Occupancy: 4{{$}}
 ; GFX1100:    ; Occupancy: 4{{$}}
-define amdgpu_kernel void @used_lds_8252_max_group_size_32() #10 {
+define amdgpu_kernel void @used_lds_8252_max_group_size_32() "amdgpu-flat-work-group-size"="1,32" {
   store volatile i8 1, ptr addrspace(3) @lds8252
   ret void
 }
-
-attributes #0 = { "amdgpu-waves-per-eu"="2,3" "amdgpu-flat-work-group-size"="1,64" }
-attributes #1 = { "amdgpu-waves-per-eu"="18,18" "amdgpu-flat-work-group-size"="1,32" }
-attributes #2 = { "amdgpu-waves-per-eu"="19,19" "amdgpu-flat-work-group-size"="1,32" }
-attributes #3 = { "amdgpu-flat-work-group-size"="1,64" }
-attributes #4 = { "amdgpu-flat-work-group-size"="1,96" }
-attributes #5 = { "amdgpu-flat-work-group-size"="1,128" }
-attributes #6 = { "amdgpu-flat-work-group-size"="1,192" }
-attributes #7 = { "amdgpu-flat-work-group-size"="1,256" }
-attributes #8 = { "amdgpu-flat-work-group-size"="1,512" }
-attributes #9 = { "amdgpu-flat-work-group-size"="1,1024" }
-attributes #10 = { "amdgpu-flat-work-group-size"="1,32" }
diff --git a/llvm/test/CodeGen/AMDGPU/omod.ll b/llvm/test/CodeGen/AMDGPU/omod.ll
index 769d035858ca83..31bc79d945cce8 100644
--- a/llvm/test/CodeGen/AMDGPU/omod.ll
+++ b/llvm/test/CodeGen/AMDGPU/omod.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11PLUS,GFX12 %s
 
 ; IEEE bit enabled for compute kernel, so shouldn't use.
-define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_signed_zeros(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #4 {
+define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_signed_zeros(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "no-signed-zeros-fp-math"="false" {
 ; SI-LABEL: v_omod_div2_f32_enable_ieee_signed_zeros:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -81,7 +81,7 @@ define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_signed_zeros(ptr addrspac
 }
 
 ; IEEE bit enabled for compute kernel, so shouldn't use.
-define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_signed_zeros(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #4 {
+define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_signed_zeros(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "no-signed-zeros-fp-math"="false" {
 ; SI-LABEL: v_omod_div2_f64_enable_ieee_signed_zeros:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -157,7 +157,7 @@ define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_signed_zeros(ptr addrspac
 }
 
 ; IEEE bit enabled for compute kernel, so shouldn't use even though nsz is allowed
-define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_nsz(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
+define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_nsz(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_div2_f32_enable_ieee_nsz:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -233,7 +233,7 @@ define amdgpu_kernel void @v_omod_div2_f32_enable_ieee_nsz(ptr addrspace(1) %out
 }
 
 ; IEEE bit enabled for compute kernel, so shouldn't use even though nsz is allowed.
-define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_nsz(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #5 {
+define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_nsz(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_omod_div2_f64_enable_ieee_nsz:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -309,7 +309,7 @@ define amdgpu_kernel void @v_omod_div2_f64_enable_ieee_nsz(ptr addrspace(1) %out
 }
 
 ; Only allow without IEEE bit if signed zeros are significant.
-define amdgpu_ps void @v_omod_div2_f32_signed_zeros(float %a) #4 {
+define amdgpu_ps void @v_omod_div2_f32_signed_zeros(float %a) nounwind "no-signed-zeros-fp-math"="false" {
 ; SI-LABEL: v_omod_div2_f32_signed_zeros:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
@@ -342,7 +342,7 @@ define amdgpu_ps void @v_omod_div2_f32_signed_zeros(float %a) #4 {
 }
 
 ; Only allow without IEEE bit if signed zeros are significant.
-define amdgpu_ps void @v_omod_div2_f64_signed_zeros(double %a) #4 {
+define amdgpu_ps void @v_omod_div2_f64_signed_zeros(double %a) nounwind "no-signed-zeros-fp-math"="false" {
 ; SI-LABEL: v_omod_div2_f64_signed_zeros:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
@@ -384,7 +384,7 @@ define amdgpu_ps void @v_omod_div2_f64_signed_zeros(double %a) #4 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_div2_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_div2_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_div2_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e64 v0, v0, 1.0 div:2
@@ -412,7 +412,7 @@ define amdgpu_ps void @v_omod_div2_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_div2_f64(double %a) #5 {
+define amdgpu_ps void @v_omod_div2_f64(double %a) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_omod_div2_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0 div:2
@@ -448,7 +448,7 @@ define amdgpu_ps void @v_omod_div2_f64(double %a) #5 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_mul2_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_mul2_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_mul2_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e64 v0, v0, 1.0 mul:2
@@ -476,7 +476,7 @@ define amdgpu_ps void @v_omod_mul2_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_mul2_med3(float %x, float %y, float %z) #0 {
+define amdgpu_ps void @v_omod_mul2_med3(float %x, float %y, float %z) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_mul2_med3:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_med3_f32 v0, v0, v1, v2 mul:2
@@ -512,7 +512,7 @@ define amdgpu_ps void @v_omod_mul2_med3(float %x, float %y, float %z) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_mul2_f64(double %a) #5 {
+define amdgpu_ps void @v_omod_mul2_f64(double %a) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_omod_mul2_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0 mul:2
@@ -548,7 +548,7 @@ define amdgpu_ps void @v_omod_mul2_f64(double %a) #5 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_mul4_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_mul4_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_mul4_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e64 v0, v0, 1.0 mul:4
@@ -576,7 +576,7 @@ define amdgpu_ps void @v_omod_mul4_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_mul4_f64(double %a) #5 {
+define amdgpu_ps void @v_omod_mul4_f64(double %a) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; SI-LABEL: v_omod_mul4_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0 mul:4
@@ -612,7 +612,7 @@ define amdgpu_ps void @v_omod_mul4_f64(double %a) #5 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_mul4_multi_use_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_mul4_multi_use_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_mul4_multi_use_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
@@ -665,7 +665,7 @@ define amdgpu_ps void @v_omod_mul4_multi_use_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_mul4_dbg_use_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_mul4_dbg_use_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_mul4_dbg_use_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e64 v0, v0, 1.0 mul:4
@@ -695,7 +695,7 @@ define amdgpu_ps void @v_omod_mul4_dbg_use_f32(float %a) #0 {
 }
 
 ; Clamp is applied after omod, folding both into instruction is OK.
-define amdgpu_ps void @v_clamp_omod_div2_f32(float %a) #0 {
+define amdgpu_ps void @v_clamp_omod_div2_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_clamp_omod_div2_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e64 v0, v0, 1.0 clamp div:2
@@ -727,7 +727,7 @@ define amdgpu_ps void @v_clamp_omod_div2_f32(float %a) #0 {
 }
 
 ; Cannot fold omod into clamp
-define amdgpu_ps void @v_omod_div2_clamp_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_div2_clamp_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_div2_clamp_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e64 v0, v0, 1.0 clamp
@@ -761,7 +761,7 @@ define amdgpu_ps void @v_omod_div2_clamp_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_div2_abs_src_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_div2_abs_src_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_div2_abs_src_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
@@ -794,7 +794,7 @@ define amdgpu_ps void @v_omod_div2_abs_src_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_add_self_clamp_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_add_self_clamp_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_add_self_clamp_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e64 v0, v0, v0 clamp
@@ -823,7 +823,7 @@ define amdgpu_ps void @v_omod_add_self_clamp_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_add_clamp_self_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_add_clamp_self_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_add_clamp_self_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_max_f32_e64 v0, v0, v0 clamp
@@ -866,7 +866,7 @@ define amdgpu_ps void @v_omod_add_clamp_self_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_add_abs_self_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_add_abs_self_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_add_abs_self_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
@@ -899,7 +899,7 @@ define amdgpu_ps void @v_omod_add_abs_self_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_add_abs_x_x_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_add_abs_x_x_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_add_abs_x_x_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
@@ -932,7 +932,7 @@ define amdgpu_ps void @v_omod_add_abs_x_x_f32(float %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_add_x_abs_x_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_add_x_abs_x_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_add_x_abs_x_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
@@ -966,7 +966,7 @@ define amdgpu_ps void @v_omod_add_x_abs_x_f32(float %a) #0 {
 }
 
 ; Don't fold omod into omod into another omod.
-define amdgpu_ps void @v_omod_div2_omod_div2_f32(float %a) #0 {
+define amdgpu_ps void @v_omod_div2_omod_div2_f32(float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_div2_omod_div2_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e64 v0, v0, 1.0 div:2
@@ -1000,7 +1000,7 @@ define amdgpu_ps void @v_omod_div2_omod_div2_f32(float %a) #0 {
 }
 
 ; Don't fold omod if denorms enabled
-define amdgpu_ps void @v_omod_div2_f32_denormals(float %a) #2 {
+define amdgpu_ps void @v_omod_div2_f32_denormals(float %a) nounwind "denormal-fp-math-f32"="ieee,ieee" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_div2_f32_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
@@ -1033,7 +1033,7 @@ define amdgpu_ps void @v_omod_div2_f32_denormals(float %a) #2 {
 }
 
 ; Don't fold omod if denorms enabled.
-define amdgpu_ps void @v_omod_div2_f64_denormals(double %a) #6 {
+define amdgpu_ps void @v_omod_div2_f64_denormals(double %a) nounwind "denormal-fp-math"="ieee,ieee" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_div2_f64_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
@@ -1076,7 +1076,7 @@ define amdgpu_ps void @v_omod_div2_f64_denormals(double %a) #6 {
 }
 
 ; Don't fold omod if denorms enabled for add form.
-define amdgpu_ps void @v_omod_mul2_f32_denormals(float %a) #2 {
+define amdgpu_ps void @v_omod_mul2_f32_denormals(float %a) nounwind "denormal-fp-math-f32"="ieee,ieee" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_mul2_f32_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
@@ -1109,7 +1109,7 @@ define amdgpu_ps void @v_omod_mul2_f32_denormals(float %a) #2 {
 }
 
 ; Don't fold omod if denorms enabled for add form.
-define amdgpu_ps void @v_omod_mul2_f64_denormals(double %a) #2 {
+define amdgpu_ps void @v_omod_mul2_f64_denormals(double %a) nounwind "denormal-fp-math-f32"="ieee,ieee" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_mul2_f64_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_add_f64 v[0:1], v[0:1], 1.0
@@ -1152,7 +1152,7 @@ define amdgpu_ps void @v_omod_mul2_f64_denormals(double %a) #2 {
 }
 
 ; Don't fold omod if denorms enabled
-define amdgpu_ps void @v_omod_div2_f16_denormals(half %a) #0 {
+define amdgpu_ps void @v_omod_div2_f16_denormals(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_div2_f16_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
@@ -1187,7 +1187,7 @@ define amdgpu_ps void @v_omod_div2_f16_denormals(half %a) #0 {
 }
 
 ; Don't fold omod if denorms enabled for add form.
-define amdgpu_ps void @v_omod_mul2_f16_denormals(half %a) #0 {
+define amdgpu_ps void @v_omod_mul2_f16_denormals(half %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_mul2_f16_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
@@ -1221,7 +1221,7 @@ define amdgpu_ps void @v_omod_mul2_f16_denormals(half %a) #0 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_div2_f16_no_denormals(half %a) #3 {
+define amdgpu_ps void @v_omod_div2_f16_no_denormals(half %a) nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_div2_f16_no_denormals:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
@@ -1252,7 +1252,7 @@ define amdgpu_ps void @v_omod_div2_f16_no_denormals(half %a) #3 {
   ret void
 }
 
-define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) #0 {
+define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" {
 ; SI-LABEL: v_omod_mac_to_mad:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    v_mad_f32 v1, v1, v1, v0 mul:2
@@ -1287,27 +1287,19 @@ define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) #0 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.floor.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
-declare double @llvm.fabs.f64(double) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.maxnum.f64(double, double) #1
-declare half @llvm.fabs.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "no-signed-zeros-fp-math"="true" }
-attributes #3 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
-attributes #4 = { nounwind "no-signed-zeros-fp-math"="false" }
-attributes #5 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #6 = { nounwind "denormal-fp-math"="ieee,ieee" "no-signed-zeros-fp-math"="true" }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
+declare double @llvm.fabs.f64(double) nounwind readnone
+declare double @llvm.minnum.f64(double, double) nounwind readnone
+declare double @llvm.maxnum.f64(double, double) nounwind readnone
+declare half @llvm.fabs.f16(half) nounwind readnone
+declare half @llvm.minnum.f16(half, half) nounwind readnone
+declare half @llvm.maxnum.f16(half, half) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2, !3}
diff --git a/llvm/test/CodeGen/AMDGPU/opencl-image-metadata.ll b/llvm/test/CodeGen/AMDGPU/opencl-image-metadata.ll
index 9dcb9b11493ace..0846dd07eed11d 100644
--- a/llvm/test/CodeGen/AMDGPU/opencl-image-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/opencl-image-metadata.ll
@@ -12,8 +12,6 @@ entry:
   ret void
 }
 
-attributes #3 = { nounwind }
-
 !opencl.kernels = !{!0}
 
 !0 = !{ptr @kernel, !1, !2, !3, !4, !5}
diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
index 1e3299a0237367..97a9a834bde4cd 100644
--- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -3,7 +3,7 @@
 
 ; CHECK-LABEL: {{^}}fold_sgpr:
 ; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
-define amdgpu_kernel void @fold_sgpr(ptr addrspace(1) %out, i32 %fold) #1 {
+define amdgpu_kernel void @fold_sgpr(ptr addrspace(1) %out, i32 %fold) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %tmp0 = icmp ne i32 %fold, 0
   br i1 %tmp0, label %if, label %endif
@@ -21,7 +21,7 @@ endif:
 
 ; CHECK-LABEL: {{^}}fold_imm:
 ; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5
-define amdgpu_kernel void @fold_imm(ptr addrspace(1) %out, i32 %cmp) #1 {
+define amdgpu_kernel void @fold_imm(ptr addrspace(1) %out, i32 %cmp) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %fold = add i32 3, 2
   %tmp0 = icmp ne i32 %cmp, 0
@@ -47,7 +47,7 @@ endif:
 ; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]]
 ; CHECK: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]],
 
-define amdgpu_kernel void @fold_64bit_constant_add(ptr addrspace(1) %out, i32 %cmp, i64 %val) #1 {
+define amdgpu_kernel void @fold_64bit_constant_add(ptr addrspace(1) %out, i32 %cmp, i64 %val) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %tmp0 = add i64 %val, 1
   store i64 %tmp0, ptr addrspace(1) %out
@@ -62,7 +62,7 @@ entry:
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
 
-define amdgpu_kernel void @vector_inline(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @vector_inline(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp0, 1
@@ -81,7 +81,7 @@ entry:
 ; CHECK-LABEL: {{^}}imm_one_use:
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}}
 
-define amdgpu_kernel void @imm_one_use(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @imm_one_use(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = xor i32 %tmp0, 100
@@ -94,7 +94,7 @@ entry:
 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, 0x64, v{{[0-9]}}
 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, 0x64, v{{[0-9]}}
 
-define amdgpu_kernel void @vector_imm(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @vector_imm(ptr addrspace(1) %out) nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp0, 1
@@ -114,7 +114,7 @@ entry:
 ; CHECK: buffer_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
 ; CHECK: v_madmk_f32 v[[RES:[0-9]+]], v[[HI]], 0x41200000, v[[LO]]
 ; CHECK: buffer_store_dword v[[RES]]
-define amdgpu_kernel void @no_fold_tied_subregister() #1 {
+define amdgpu_kernel void @no_fold_tied_subregister() nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %tmp1 = load volatile <2 x float>, ptr addrspace(1) undef
   %tmp2 = extractelement <2 x float> %tmp1, i32 0
   %tmp3 = extractelement <2 x float> %tmp1, i32 1
@@ -128,7 +128,7 @@ define amdgpu_kernel void @no_fold_tied_subregister() #1 {
 ; CHECK-LABEL: {{^}}no_extra_fold_on_same_opnd
 ; CHECK-NOT: %bb.1:
 ; CHECK: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define void @no_extra_fold_on_same_opnd() #1 {
+define void @no_extra_fold_on_same_opnd() nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 entry:
   %s0 = load i32, ptr addrspace(5) undef, align 4
   %s0.i64= zext i32 %s0 to i64
@@ -148,7 +148,4 @@ if.else:
   unreachable
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/overlapping-tuple-copy-implicit-op-failure.ll b/llvm/test/CodeGen/AMDGPU/overlapping-tuple-copy-implicit-op-failure.ll
index d1469ed6c67432..3b7a721d7bc696 100644
--- a/llvm/test/CodeGen/AMDGPU/overlapping-tuple-copy-implicit-op-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/overlapping-tuple-copy-implicit-op-failure.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s
 
 ; Testcase which happened to trigger a liveness verifier error
-define amdgpu_kernel void @test_long_add4(<4 x i64> %arg) #0 {
+define amdgpu_kernel void @test_long_add4(<4 x i64> %arg) noinline optnone {
 ; CHECK-LABEL: test_long_add4:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
@@ -99,5 +99,3 @@ entry:
   store <4 x i64> %add, ptr addrspace(1) null, align 32
   ret void
 }
-
-attributes #0 = { noinline optnone }
diff --git a/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll b/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll
index e21b93a386c3e7..fe364027de199b 100644
--- a/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7 %s
 
 
-define amdgpu_kernel void @s_pack_v2f16(ptr addrspace(4) %in0, ptr addrspace(4) %in1) #0 {
+define amdgpu_kernel void @s_pack_v2f16(ptr addrspace(4) %in0, ptr addrspace(4) %in1) nounwind {
 ; GFX9-LABEL: s_pack_v2f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -57,11 +57,11 @@ define amdgpu_kernel void @s_pack_v2f16(ptr addrspace(4) %in0, ptr addrspace(4)
   %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
 
-  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @s_pack_v2f16_imm_lo(ptr addrspace(4) %in1) #0 {
+define amdgpu_kernel void @s_pack_v2f16_imm_lo(ptr addrspace(4) %in1) nounwind {
 ; GFX9-LABEL: s_pack_v2f16_imm_lo:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -106,11 +106,11 @@ define amdgpu_kernel void @s_pack_v2f16_imm_lo(ptr addrspace(4) %in1) #0 {
   %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
 
-  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @s_pack_v2f16_imm_hi(ptr addrspace(4) %in0) #0 {
+define amdgpu_kernel void @s_pack_v2f16_imm_hi(ptr addrspace(4) %in0) nounwind {
 ; GFX9-LABEL: s_pack_v2f16_imm_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -155,11 +155,11 @@ define amdgpu_kernel void @s_pack_v2f16_imm_hi(ptr addrspace(4) %in0) #0 {
   %vec.1 = insertelement <2 x half> %vec.0, half 0xH1234, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
 
-  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2f16(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_v2f16(ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_pack_v2f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -233,11 +233,11 @@ define amdgpu_kernel void @v_pack_v2f16(ptr addrspace(1) %in0, ptr addrspace(1)
   %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2f16_user(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_v2f16_user(ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_pack_v2f16_user:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -321,7 +321,7 @@ define amdgpu_kernel void @v_pack_v2f16_user(ptr addrspace(1) %in0, ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2f16_imm_lo(ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_v2f16_imm_lo(ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_pack_v2f16_imm_lo:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -379,11 +379,11 @@ define amdgpu_kernel void @v_pack_v2f16_imm_lo(ptr addrspace(1) %in1) #0 {
   %vec.0 = insertelement <2 x half> undef, half 0xH1234, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2f16_inline_imm_lo(ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_v2f16_inline_imm_lo(ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_pack_v2f16_inline_imm_lo:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -441,11 +441,11 @@ define amdgpu_kernel void @v_pack_v2f16_inline_imm_lo(ptr addrspace(1) %in1) #0
   %vec.0 = insertelement <2 x half> undef, half 4.0, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2f16_imm_hi(ptr addrspace(1) %in0) #0 {
+define amdgpu_kernel void @v_pack_v2f16_imm_hi(ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_pack_v2f16_imm_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -503,11 +503,11 @@ define amdgpu_kernel void @v_pack_v2f16_imm_hi(ptr addrspace(1) %in0) #0 {
   %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half 0xH1234, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2f16_inline_f16imm_hi(ptr addrspace(1) %in0) #0 {
+define amdgpu_kernel void @v_pack_v2f16_inline_f16imm_hi(ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_pack_v2f16_inline_f16imm_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -565,11 +565,11 @@ define amdgpu_kernel void @v_pack_v2f16_inline_f16imm_hi(ptr addrspace(1) %in0)
   %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half 1.0, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2f16_inline_imm_hi(ptr addrspace(1) %in0) #0 {
+define amdgpu_kernel void @v_pack_v2f16_inline_imm_hi(ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_pack_v2f16_inline_imm_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -626,11 +626,8 @@ define amdgpu_kernel void @v_pack_v2f16_inline_imm_hi(ptr addrspace(1) %in0) #0
   %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half 0xH0040, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll
index 4b21493bd7ca66..82e011fa6f8c46 100644
--- a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7 %s
 
 
-define amdgpu_kernel void @s_pack_v2i16(ptr addrspace(4) %in0, ptr addrspace(4) %in1) #0 {
+define amdgpu_kernel void @s_pack_v2i16(ptr addrspace(4) %in0, ptr addrspace(4) %in1) nounwind {
 ; GFX9-LABEL: s_pack_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -55,11 +55,11 @@ define amdgpu_kernel void @s_pack_v2i16(ptr addrspace(4) %in0, ptr addrspace(4)
   %vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1
   %vec.i32 = bitcast <2 x i16> %vec.1 to i32
 
-  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @s_pack_v2i16_imm_lo(ptr addrspace(4) %in1) #0 {
+define amdgpu_kernel void @s_pack_v2i16_imm_lo(ptr addrspace(4) %in1) nounwind {
 ; GFX9-LABEL: s_pack_v2i16_imm_lo:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -103,11 +103,11 @@ define amdgpu_kernel void @s_pack_v2i16_imm_lo(ptr addrspace(4) %in1) #0 {
   %vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1
   %vec.i32 = bitcast <2 x i16> %vec.1 to i32
 
-  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @s_pack_v2i16_imm_hi(ptr addrspace(4) %in0) #0 {
+define amdgpu_kernel void @s_pack_v2i16_imm_hi(ptr addrspace(4) %in0) nounwind {
 ; GFX9-LABEL: s_pack_v2i16_imm_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -151,11 +151,11 @@ define amdgpu_kernel void @s_pack_v2i16_imm_hi(ptr addrspace(4) %in0) #0 {
   %vec.1 = insertelement <2 x i16> %vec.0, i16 456, i32 1
   %vec.i32 = bitcast <2 x i16> %vec.1 to i32
 
-  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2i16(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_v2i16(ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_pack_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -227,11 +227,11 @@ define amdgpu_kernel void @v_pack_v2i16(ptr addrspace(1) %in0, ptr addrspace(1)
   %vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
   %vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1
   %vec.i32 = bitcast <2 x i16> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2i16_user(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_v2i16_user(ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_pack_v2i16_user:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -313,7 +313,7 @@ define amdgpu_kernel void @v_pack_v2i16_user(ptr addrspace(1) %in0, ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2i16_imm_lo(ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_v2i16_imm_lo(ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_pack_v2i16_imm_lo:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -370,11 +370,11 @@ define amdgpu_kernel void @v_pack_v2i16_imm_lo(ptr addrspace(1) %in1) #0 {
   %vec.0 = insertelement <2 x i16> undef, i16 123, i32 0
   %vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1
   %vec.i32 = bitcast <2 x i16> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2i16_inline_imm_lo(ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_v2i16_inline_imm_lo(ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_pack_v2i16_inline_imm_lo:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -430,11 +430,11 @@ define amdgpu_kernel void @v_pack_v2i16_inline_imm_lo(ptr addrspace(1) %in1) #0
   %vec.0 = insertelement <2 x i16> undef, i16 64, i32 0
   %vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1
   %vec.i32 = bitcast <2 x i16> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2i16_imm_hi(ptr addrspace(1) %in0) #0 {
+define amdgpu_kernel void @v_pack_v2i16_imm_hi(ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_pack_v2i16_imm_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -491,11 +491,11 @@ define amdgpu_kernel void @v_pack_v2i16_imm_hi(ptr addrspace(1) %in0) #0 {
   %vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
   %vec.1 = insertelement <2 x i16> %vec.0, i16 123, i32 1
   %vec.i32 = bitcast <2 x i16> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(ptr addrspace(1) %in0) #0 {
+define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_pack_v2i16_inline_imm_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -551,12 +551,9 @@ define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(ptr addrspace(1) %in0) #0
   %vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
   %vec.1 = insertelement <2 x i16> %vec.0, i16 7, i32 1
   %vec.i32 = bitcast <2 x i16> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
diff --git a/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll b/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
index 4d6adc7cc94171..503a685872c9b9 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
@@ -11,7 +11,7 @@
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0]{{$}}
-define amdgpu_kernel void @fma_vector_vector_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) #0 {
+define amdgpu_kernel void @fma_vector_vector_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
 
@@ -39,7 +39,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_neg_broadcast_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) #0 {
+define amdgpu_kernel void @fma_vector_vector_neg_broadcast_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
 
@@ -68,7 +68,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) #0 {
+define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
 
@@ -97,7 +97,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0]{{$}}
-define amdgpu_kernel void @fma_vector_vector_neg_broadcast_neg_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) #0 {
+define amdgpu_kernel void @fma_vector_vector_neg_broadcast_neg_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
 
@@ -127,7 +127,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_lo:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_scalar_neg_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) #0 {
+define amdgpu_kernel void @fma_vector_vector_scalar_neg_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
 
@@ -155,7 +155,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_hi:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_scalar_neg_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) #0 {
+define amdgpu_kernel void @fma_vector_vector_scalar_neg_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
 
@@ -182,7 +182,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_add_u16 v{{[0-9]+}}, [[VEC0]], [[SCALAR0]] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}}
-define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) #0 {
+define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) nounwind {
 bb:
   %vec0 = load volatile <2 x i16>, ptr addrspace(3) %lds, align 4
   %scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2
@@ -209,7 +209,7 @@ bb:
 ; GCN: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[SCALAR1]], 16, [[SCALAR0]]
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[PACKED]]{{$}}
-define amdgpu_kernel void @fma_vector_vector_scalar_lo_neg_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) #0 {
+define amdgpu_kernel void @fma_vector_vector_scalar_lo_neg_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %arg2.gep = getelementptr inbounds half, ptr addrspace(3) %arg2, i32 2
@@ -237,7 +237,7 @@ bb:
 ; GCN: ds_read_u16_d16_hi [[PACKED]]
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[PACKED]] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) #0 {
+define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %arg2.gep = getelementptr inbounds half, ptr addrspace(3) %arg2, i32 2
@@ -268,7 +268,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_neg_vector_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_neg_vector_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -296,7 +296,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] neg_hi:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_vector_neg_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_vector_neg_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -324,7 +324,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_add_u16 v{{[0-9]+}}, [[VEC0]], [[VEC1]] op_sel:[0,1]{{$}}
-define amdgpu_kernel void @add_vector_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @add_vector_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x i16>, ptr addrspace(3) %lds, i32 1
 
@@ -349,7 +349,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -377,7 +377,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]]{{$}}
-define amdgpu_kernel void @fma_vector_vector_neg_vector_lo_neg_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_neg_vector_lo_neg_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -407,7 +407,7 @@ bb:
 ; GCN-NOT: or
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] op_sel_hi:[1,1,0]{{$}}
-define amdgpu_kernel void @fma_vector_vector_swap_vector(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_swap_vector(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -435,7 +435,7 @@ bb:
 ; GCN-NOT: xor
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] op_sel_hi:[1,1,0] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_swap_neg_vector(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_swap_neg_vector(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -464,7 +464,7 @@ bb:
 ; GCN-NOT: xor
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] op_sel_hi:[1,1,0] neg_lo:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_0(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_0(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -492,7 +492,7 @@ bb:
 ; GCN-NOT: xor
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] neg_lo:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_1(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_1(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -520,7 +520,7 @@ bb:
 ; GCN-NOT: xor
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] neg_hi:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_2(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_2(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -548,7 +548,7 @@ bb:
 ; GCN-NOT: xor
 
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] neg_lo:[0,0,1]{{$}}
-define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_3(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_3(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -566,7 +566,7 @@ bb:
 
 ; GCN-LABEL: {{^}}bitcast_fneg_f32:
 ; GCN: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+$}}
-define amdgpu_kernel void @bitcast_fneg_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @bitcast_fneg_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %vec0 = load volatile <2 x half>, ptr addrspace(3) %lds, align 4
   %f32 = load volatile float, ptr addrspace(3) undef, align 4
@@ -580,7 +580,7 @@ bb:
 
 ; GCN-LABEL: {{^}}shuffle_bitcast_fneg_f32:
 ; GCN: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} op_sel:[0,1] op_sel_hi:[1,0]{{$}}
-define amdgpu_kernel void @shuffle_bitcast_fneg_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @shuffle_bitcast_fneg_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %vec0 = load volatile <2 x half>, ptr addrspace(3) %lds, align 4
 
@@ -596,7 +596,7 @@ bb:
 ; GCN-LABEL: {{^}}extract_from_i64:
 ; GCN: v_lshl_or_b32
 ; GCN: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+$}}
-define amdgpu_kernel void @extract_from_i64(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @extract_from_i64(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %vec0 = load volatile <2 x i16>, ptr addrspace(3) %lds, align 4
   %i64 = load volatile i64, ptr addrspace(1) undef
@@ -626,7 +626,7 @@ bb:
 
 ; GCN: v_pk_add_f16 [[FADD:v[0-9]+]]
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[FADD]] op_sel:[0,0,1] op_sel_hi:[1,1,0]{{$}}
-define amdgpu_kernel void @bitcast_lo_elt_op_sel(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @bitcast_lo_elt_op_sel(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -661,7 +661,7 @@ bb:
 
 ; GCN: v_pk_add_f16 [[FADD:v[0-9]+]]
 ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[FADD]] op_sel:[0,0,1] op_sel_hi:[1,1,0]{{$}}
-define amdgpu_kernel void @mix_elt_types_op_sel(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
+define amdgpu_kernel void @mix_elt_types_op_sel(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
 bb:
   %lds.gep1 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 1
   %lds.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(3) %lds, i32 2
@@ -685,7 +685,4 @@ bb:
   ret void
 }
 
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
index 538ce15979de88..6e7cf19adf2551 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll
@@ -161,17 +161,17 @@
 ; CHECK-NEXT:...
 ; CHECK-NEXT:        .end_amdgpu_pal_metadata
 
-declare amdgpu_gfx float @extern_func(float) #0
-declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
+declare amdgpu_gfx float @extern_func(float) nounwind
+declare amdgpu_gfx float @extern_func_many_args(<64 x float>) nounwind
 
 @funcptr = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
-define amdgpu_gfx float @no_stack(float %arg0) #0 {
+define amdgpu_gfx float @no_stack(float %arg0) nounwind {
   %add = fadd float %arg0, 1.0
   ret float %add
 }
 
-define amdgpu_gfx float @simple_stack(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -179,7 +179,7 @@ define amdgpu_gfx float @simple_stack(float %arg0) #0 {
   ret float %add
 }
 
-define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
+define amdgpu_gfx float @multiple_stack(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -191,7 +191,7 @@ define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
   ret float %add2
 }
 
-define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
+define amdgpu_gfx float @dynamic_stack(float %arg0) nounwind {
 bb0:
   %cmp = fcmp ogt float %arg0, 0.0
   br i1 %cmp, label %bb1, label %bb2
@@ -208,7 +208,7 @@ bb2:
   ret float %res
 }
 
-define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
+define amdgpu_gfx float @dynamic_stack_loop(float %arg0) nounwind {
 bb0:
   br label %bb1
 
@@ -226,12 +226,12 @@ bb2:
   ret float %add
 }
 
-define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
+define amdgpu_gfx float @no_stack_call(float %arg0) nounwind {
   %res = call amdgpu_gfx float @simple_stack(float %arg0)
   ret float %res
 }
 
-define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack_call(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -240,12 +240,12 @@ define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
   ret float %add
 }
 
-define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
+define amdgpu_gfx float @no_stack_extern_call(float %arg0) nounwind {
   %res = call amdgpu_gfx float @extern_func(float %arg0)
   ret float %res
 }
 
-define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack_extern_call(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -254,18 +254,18 @@ define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
   ret float %add
 }
 
-define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
+define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) nounwind {
   %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
   ret float %res
 }
 
-define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
+define amdgpu_gfx float @no_stack_indirect_call(float %arg0) nounwind {
   %fptr = load ptr, ptr addrspace(4) @funcptr
   call amdgpu_gfx void %fptr()
   ret float %arg0
 }
 
-define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -275,7 +275,7 @@ define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
   ret float %add
 }
 
-define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
+define amdgpu_gfx float @simple_stack_recurse(float %arg0) nounwind {
   %stack = alloca float, i32 4, align 4, addrspace(5)
   store volatile float 2.0, ptr addrspace(5) %stack
   %val = load volatile float, ptr addrspace(5) %stack
@@ -286,19 +286,17 @@ define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
 
 @lds = internal addrspace(3) global [64 x float] undef
 
-define amdgpu_gfx float @simple_lds(float %arg0) #0 {
+define amdgpu_gfx float @simple_lds(float %arg0) nounwind {
   %val = load float, ptr addrspace(3) @lds
   ret float %val
 }
 
-define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
+define amdgpu_gfx float @simple_lds_recurse(float %arg0) nounwind {
   %val = load float, ptr addrspace(3) @lds
   %res = call amdgpu_gfx float @simple_lds_recurse(float %val)
   ret float %res
 }
 
-attributes #0 = { nounwind }
-
 !amdgpu.pal.metadata.msgpack = !{!0}
 
 !0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"}
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
index cabc11037017e5..48f1141533362e 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
@@ -124,7 +124,7 @@
 ; CHECK-NEXT:...
 ; CHECK-NEXT:        .end_amdgpu_pal_metadata
 
-define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg1, i32 %arg2) #0 !lgc.shaderstage !1 {
+define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg1, i32 %arg2) nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="4" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode" !lgc.shaderstage !1 {
 .entry:
   %i = call i64 @llvm.amdgcn.s.getpc()
   %i1 = and i64 %i, -4294967296
@@ -141,14 +141,14 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg1, i32 %arg2) #0
   ret void
 }
 
-define dllexport amdgpu_ps void @ps_shader() #1 {
+define dllexport amdgpu_ps void @ps_shader() nounwind memory(readwrite) "InitialPSInputAddr"="36983" {
   ret void
 }
 
 !amdgpu.pal.metadata.msgpack = !{!0}
 
 ; Function Attrs: nounwind willreturn memory(none)
-declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #1
+declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) nounwind memory(readwrite) "InitialPSInputAddr"="36983"
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i64 @llvm.amdgcn.s.getpc() #2
@@ -156,9 +156,5 @@ declare i64 @llvm.amdgcn.s.getpc() #2
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
 declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg) #3
 
-attributes #0 = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="4" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode" }
-
-attributes #1 = { nounwind memory(readwrite) "InitialPSInputAddr"="36983" }
-
 !0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"}
 !1 = !{i32 7}
diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
index 2e9f09ad41813d..97992c6b135386 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
@@ -64,9 +64,7 @@ define amdgpu_cs void @test_simple_indirect_call() {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare i64 @llvm.amdgcn.s.getpc() #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i64 @llvm.amdgcn.s.getpc() nounwind readnone speculatable willreturn
 ;.
 ; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 45fbaaabc65b58..0444495fe569e1 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -5,7 +5,7 @@
 ;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --stop-after=prologepilog -verify-machineinstrs < %s | FileCheck -check-prefix=PEI-GFX90A %s
 
 ; Partial reg copy and spill missed during regalloc handled later at frame lowering.
-define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
+define amdgpu_kernel void @partial_copy(<4 x i32> %arg) nounwind "amdgpu-num-vgpr"="5" {
   ; REGALLOC-GFX908-LABEL: name: partial_copy
   ; REGALLOC-GFX908: bb.0 (%ir-block.0):
   ; REGALLOC-GFX908-NEXT:   liveins: $sgpr4_sgpr5
@@ -110,5 +110,3 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
 }
 
 declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
-
-attributes #0 = { nounwind "amdgpu-num-vgpr"="5" }
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index 5b0354e63c2365..508ba4ad5b4a66 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -8,7 +8,7 @@
 
 ; Ideally we only need 2 VGPRs for all spilling. The VGPRs are
 ; allocated per-frame index, so it's possible to get up with more.
-define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, i32 %in) nounwind {
 ; GCN-LABEL: spill_sgprs_to_multiple_vgprs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_mov_b32 s92, SCRATCH_RSRC_DWORD0
@@ -435,44 +435,44 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
 ; GCN-NEXT:    ; kill: killed $vgpr1
 ; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
-  %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr4 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr5 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr7 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr8 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr9 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr11 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr12 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr13 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr15 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr16 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
+  %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr2 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr4 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr5 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr6 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr7 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr8 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr9 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr10 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr11 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr12 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr13 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr14 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr15 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr16 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr1) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr2) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr4) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr5) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr6) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr7) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr8) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr9) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr10) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr11) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr12) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr13) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr14) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr15) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr16) #0
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr1) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr2) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr4) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr5) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr6) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr7) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr8) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr9) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr10) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr11) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr12) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr13) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr14) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr15) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr16) nounwind
   br label %ret
 
 ret:
@@ -481,7 +481,7 @@ ret:
 
 ; Some of the lanes of an SGPR spill are in one VGPR and some forced
 ; into the next available VGPR.
-define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %in) #1 {
+define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %in) nounwind "amdgpu-waves-per-eu"="8,8" {
 ; GCN-LABEL: split_sgpr_spill_2_vgprs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
@@ -706,23 +706,23 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    ; kill: killed $vgpr1
 ; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
-  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
 
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) nounwind
   br label %ret
 
 ret:
@@ -732,7 +732,7 @@ ret:
 ; The first 64 SGPR spills can go to a VGPR, but there isn't a second
 ; so some spills must be to memory. The last 16 element spill runs out
 ; of lanes at the 15th element.
-define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 {
+define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) nounwind "amdgpu-waves-per-eu"="8,8" {
 ; GCN-LABEL: no_vgprs_last_sgpr_spill:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
@@ -955,27 +955,27 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
 ; GCN-NEXT:    ; kill: killed $vgpr1
 ; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
-  call void asm sideeffect "", "~{v[0:7]}" () #0
-  call void asm sideeffect "", "~{v[8:15]}" () #0
-  call void asm sideeffect "", "~{v[16:23]}" () #0
-  call void asm sideeffect "", "~{v[24:27]}"() #0
-  call void asm sideeffect "", "~{v[28:29]}"() #0
-  call void asm sideeffect "", "~{v30}"() #0
+  call void asm sideeffect "", "~{v[0:7]}" () nounwind
+  call void asm sideeffect "", "~{v[8:15]}" () nounwind
+  call void asm sideeffect "", "~{v[16:23]}" () nounwind
+  call void asm sideeffect "", "~{v[24:27]}"() nounwind
+  call void asm sideeffect "", "~{v[28:29]}"() nounwind
+  call void asm sideeffect "", "~{v30}"() nounwind
 
-  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) nounwind
   br label %ret
 
 ret:
@@ -984,7 +984,7 @@ ret:
 
 ; Same as @no_vgprs_last_sgpr_spill, some SGPR spills must go to memory.
 ; Additionally, v0 is live throughout the function.
-define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
+define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) nounwind "amdgpu-waves-per-eu"="8,8" {
 ; GCN-LABEL: no_vgprs_last_sgpr_spill_live_v0:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
@@ -1213,37 +1213,34 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
 ; GCN-NEXT:    ; kill: killed $vgpr1
 ; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
-  call void asm sideeffect "", "~{v[0:7]}" () #0
-  call void asm sideeffect "", "~{v[8:15]}" () #0
-  call void asm sideeffect "", "~{v[16:23]}" () #0
-  call void asm sideeffect "", "~{v[24:27]}"() #0
-  call void asm sideeffect "", "~{v[28:29]}"() #0
-  call void asm sideeffect "", "~{v30}"() #0
+  call void asm sideeffect "", "~{v[0:7]}" () nounwind
+  call void asm sideeffect "", "~{v[8:15]}" () nounwind
+  call void asm sideeffect "", "~{v[16:23]}" () nounwind
+  call void asm sideeffect "", "~{v[24:27]}"() nounwind
+  call void asm sideeffect "", "~{v[28:29]}"() nounwind
+  call void asm sideeffect "", "~{v30}"() nounwind
 
-  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  %vgpr0 = call i32 asm sideeffect "; def $0", "=v" () #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
-  call void asm sideeffect "; use $0", "v"(i32 %vgpr0) #0
+  %vgpr0 = call i32 asm sideeffect "; def $0", "=v" () nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) nounwind
+  call void asm sideeffect "; use $0", "v"(i32 %vgpr0) nounwind
   br label %ret
 
 ret:
   ret void
 }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll b/llvm/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
index 70f4f9688e986f..0f24a382272b20 100644
--- a/llvm/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
+++ b/llvm/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
@@ -8,10 +8,10 @@
 ; During live interval construction, the first sub register def is
 ; incorrectly marked as dead.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @dead_def_subregister(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @dead_def_subregister(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %val = load i64, ptr addrspace(1) %in.gep
 
@@ -23,6 +23,3 @@ define amdgpu_kernel void @dead_def_subregister(ptr addrspace(1) noalias %out, p
   store i32 %elt1, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
index 8eddc9a5afd50c..75b64a33f77a5e 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
@@ -4,45 +4,40 @@
 
 --- |
 
-  define void @test_spill_v2_partial_agpr() #1 {
+  define void @test_spill_v2_partial_agpr() nounwind "amdgpu-num-vgpr"="1" {
   entry:
     unreachable
   }
 
-  define void @test_spill_v3_partial_agpr() #1 {
+  define void @test_spill_v3_partial_agpr() nounwind "amdgpu-num-vgpr"="1" {
   entry:
     unreachable
   }
 
-  define void @test_spill_v4_partial_agpr() #2 {
+  define void @test_spill_v4_partial_agpr() nounwind "amdgpu-num-vgpr"="3" {
   entry:
     unreachable
   }
 
-  define void @test_spill_v5_partial_agpr() #2 {
+  define void @test_spill_v5_partial_agpr() nounwind "amdgpu-num-vgpr"="3" {
   entry:
     unreachable
   }
 
-  define void @test_spill_v6_partial_agpr() #4 {
+  define void @test_spill_v6_partial_agpr() nounwind "amdgpu-num-vgpr"="5" {
   entry:
     unreachable
   }
 
-  define void @test_spill_v8_partial_agpr() #3 {
+  define void @test_spill_v8_partial_agpr() nounwind "amdgpu-num-vgpr"="4" {
   entry:
     unreachable
   }
 
-  define void @test_spill_v16_partial_agpr() #4 {
+  define void @test_spill_v16_partial_agpr() nounwind "amdgpu-num-vgpr"="5" {
   entry:
     unreachable
   }
-
-  attributes #1 = { nounwind "amdgpu-num-vgpr"="1" }
-  attributes #2 = { nounwind "amdgpu-num-vgpr"="3" }
-  attributes #3 = { nounwind "amdgpu-num-vgpr"="4" }
-  attributes #4 = { nounwind "amdgpu-num-vgpr"="5" }
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll b/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
index 4794c296215253..aeb8a139b6811c 100644
--- a/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
+++ b/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
@@ -32,11 +32,8 @@ define amdgpu_kernel void @dbg_clause(ptr addrspace(1) %out, ptr addrspace(1) %a
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind readnone speculatable willreturn }
-attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
+declare void @llvm.dbg.value(metadata, metadata, metadata) nofree nosync nounwind readnone speculatable willreturn
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2, !3}
diff --git a/llvm/test/CodeGen/AMDGPU/predicate-dp4.ll b/llvm/test/CodeGen/AMDGPU/predicate-dp4.ll
index 9e2778bb3e71ab..19b44086e33dc1 100644
--- a/llvm/test/CodeGen/AMDGPU/predicate-dp4.ll
+++ b/llvm/test/CodeGen/AMDGPU/predicate-dp4.ll
@@ -21,6 +21,5 @@ ENDIF:                                            ; preds = %IF, %main_body
   ret void
 }
 
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
+declare float @llvm.r600.dot4(<4 x float>, <4 x float>) readnone
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs-IR-lowering.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs-IR-lowering.ll
index b9a0124fce225f..cbfcd75dea242c 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs-IR-lowering.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs-IR-lowering.ll
@@ -4,7 +4,7 @@
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=3 -S < %s | FileCheck -check-prefix=PRELOAD-3 %s
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-attributor -amdgpu-lower-kernel-arguments -amdgpu-kernarg-preload-count=8 -S < %s | FileCheck -check-prefix=PRELOAD-8 %s
 
-define amdgpu_kernel void @test_preload_IR_lowering_kernel_2(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_preload_IR_lowering_kernel_2(ptr addrspace(1) %in, ptr addrspace(1) %out) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_IR_lowering_kernel_2
 ; NO-PRELOAD-SAME: (ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
 ; NO-PRELOAD-NEXT:    [[TEST_PRELOAD_IR_LOWERING_KERNEL_2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -44,7 +44,7 @@ define amdgpu_kernel void @test_preload_IR_lowering_kernel_2(ptr addrspace(1) %i
   ret void
 }
 
-define amdgpu_kernel void @test_preload_IR_lowering_kernel_4(ptr addrspace(1) %in, ptr addrspace(1) %in1, ptr addrspace(1) %out, ptr addrspace(1) %out1) #0 {
+define amdgpu_kernel void @test_preload_IR_lowering_kernel_4(ptr addrspace(1) %in, ptr addrspace(1) %in1, ptr addrspace(1) %out, ptr addrspace(1) %out1) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_IR_lowering_kernel_4
 ; NO-PRELOAD-SAME: (ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[OUT1:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    [[TEST_PRELOAD_IR_LOWERING_KERNEL_4_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -104,7 +104,7 @@ define amdgpu_kernel void @test_preload_IR_lowering_kernel_4(ptr addrspace(1) %i
   ret void
 }
 
-define amdgpu_kernel void @test_preload_IR_lowering_kernel_8(ptr addrspace(1) %in, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3, ptr addrspace(1) %out, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) #0 {
+define amdgpu_kernel void @test_preload_IR_lowering_kernel_8(ptr addrspace(1) %in, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3, ptr addrspace(1) %out, ptr addrspace(1) %out1, ptr addrspace(1) %out2, ptr addrspace(1) %out3) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_IR_lowering_kernel_8
 ; NO-PRELOAD-SAME: (ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]], ptr addrspace(1) [[IN3:%.*]], ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[OUT1:%.*]], ptr addrspace(1) [[OUT2:%.*]], ptr addrspace(1) [[OUT3:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    [[TEST_PRELOAD_IR_LOWERING_KERNEL_8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -212,7 +212,7 @@ define amdgpu_kernel void @test_preload_IR_lowering_kernel_8(ptr addrspace(1) %i
 
 ; Preload args with inreg in the NO-PRELOAD case.
 
-define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_inreg_offset(ptr addrspace(1) %in, ptr addrspace(1) %in1, ptr addrspace(1) inreg %out, ptr addrspace(1) inreg %out1) #0 {
+define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_inreg_offset(ptr addrspace(1) %in, ptr addrspace(1) %in1, ptr addrspace(1) inreg %out, ptr addrspace(1) inreg %out1) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_IR_lowering_kernel_4_inreg_offset
 ; NO-PRELOAD-SAME: (ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) inreg [[OUT:%.*]], ptr addrspace(1) inreg [[OUT1:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    [[TEST_PRELOAD_IR_LOWERING_KERNEL_4_INREG_OFFSET_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -272,7 +272,7 @@ define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_inreg_offset(ptr ad
 
 ; Only preload the first sequence of arguments with the inreg attribute. In the NO-PRELOAD case this is just the first argument.
 
-define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_inreg_offset_two_sequence(ptr addrspace(1) inreg %in, ptr addrspace(1) %in1, ptr addrspace(1) inreg %out, ptr addrspace(1) inreg %out1) #0 {
+define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_inreg_offset_two_sequence(ptr addrspace(1) inreg %in, ptr addrspace(1) %in1, ptr addrspace(1) inreg %out, ptr addrspace(1) inreg %out1) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_IR_lowering_kernel_4_inreg_offset_two_sequence
 ; NO-PRELOAD-SAME: (ptr addrspace(1) inreg [[IN:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) inreg [[OUT:%.*]], ptr addrspace(1) inreg [[OUT1:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    [[TEST_PRELOAD_IR_LOWERING_KERNEL_4_INREG_OFFSET_TWO_SEQUENCE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -328,7 +328,7 @@ define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_inreg_offset_two_se
   ret void
 }
 
-define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_misaligned(i16 %arg0, ptr addrspace(1) %in, ptr addrspace(1) %in1, ptr addrspace(1) %out, ptr addrspace(1) %out1) #0 {
+define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_misaligned(i16 %arg0, ptr addrspace(1) %in, ptr addrspace(1) %in1, ptr addrspace(1) %out, ptr addrspace(1) %out1) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_IR_lowering_kernel_4_misaligned
 ; NO-PRELOAD-SAME: (i16 [[ARG0:%.*]], ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[OUT1:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    [[TEST_PRELOAD_IR_LOWERING_KERNEL_4_MISALIGNED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -407,7 +407,7 @@ define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_misaligned(i16 %arg
 
 ; In this case both i16 args with be preloaded into the first SGPR.
 
-define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_i16_i16(i16 %arg0, i16 %arg1, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_i16_i16(i16 %arg0, i16 %arg1, ptr addrspace(1) %out) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_IR_lowering_kernel_4_i16_i16
 ; NO-PRELOAD-SAME: (i16 [[ARG0:%.*]], i16 [[ARG1:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    [[TEST_PRELOAD_IR_LOWERING_KERNEL_4_I16_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -465,5 +465,3 @@ define amdgpu_kernel void @test_preload_IR_lowering_kernel_4_i16_i16(i16 %arg0,
   store i32 %add, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
index 20edbd6c0d0fa6..fe622272b2bf5e 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
@@ -5,7 +5,7 @@
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=16 -passes=amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-16 %s
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=20 -passes=amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-20 %s
 
-define amdgpu_kernel void @test_preload_hint_kernel_1(ptr %0) #0 {
+define amdgpu_kernel void @test_preload_hint_kernel_1(ptr %0) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
 ; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; NO-PRELOAD-NEXT:    ret void
@@ -29,7 +29,7 @@ define amdgpu_kernel void @test_preload_hint_kernel_1(ptr %0) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_preload_hint_kernel_2(i32 %0, i64 %1) #0 {
+define amdgpu_kernel void @test_preload_hint_kernel_2(i32 %0, i64 %1) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
 ; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    ret void
@@ -53,7 +53,7 @@ define amdgpu_kernel void @test_preload_hint_kernel_2(i32 %0, i64 %1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_preload_hint_kernel_4(i32 %0, i64 %1, <2 x float> %2, ptr %3) #0 {
+define amdgpu_kernel void @test_preload_hint_kernel_4(i32 %0, i64 %1, <2 x float> %2, ptr %3) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
 ; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    ret void
@@ -77,7 +77,7 @@ define amdgpu_kernel void @test_preload_hint_kernel_4(i32 %0, i64 %1, <2 x float
   ret void
 }
 
-define amdgpu_kernel void @test_preload_hint_kernel_18(i32 %0, i64 %1, <2 x float> %2, ptr %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17) #0 {
+define amdgpu_kernel void @test_preload_hint_kernel_18(i32 %0, i64 %1, <2 x float> %2, ptr %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
 ; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    ret void
@@ -101,7 +101,7 @@ define amdgpu_kernel void @test_preload_hint_kernel_18(i32 %0, i64 %1, <2 x floa
   ret void
 }
 
-define void @test_preload_hint_non_kernel_2(i32 %0, i64 %1) #0 {
+define void @test_preload_hint_non_kernel_2(i32 %0, i64 %1) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
 ; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
 ; NO-PRELOAD-NEXT:    ret void
@@ -125,7 +125,7 @@ define void @test_preload_hint_non_kernel_2(i32 %0, i64 %1) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_preload_hint_kernel_1_call_func(ptr %0) #0 {
+define amdgpu_kernel void @test_preload_hint_kernel_1_call_func(ptr %0) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
 ; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
 ; NO-PRELOAD-NEXT:    call void @func(ptr [[TMP0]])
@@ -155,7 +155,7 @@ define amdgpu_kernel void @test_preload_hint_kernel_1_call_func(ptr %0) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_preload_hint_kernel_1_call_intrinsic(i16 %0) #0 {
+define amdgpu_kernel void @test_preload_hint_kernel_1_call_intrinsic(i16 %0) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
 ; NO-PRELOAD-SAME: (i16 [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] {
 ; NO-PRELOAD-NEXT:    call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
@@ -185,7 +185,7 @@ define amdgpu_kernel void @test_preload_hint_kernel_1_call_intrinsic(i16 %0) #0
   ret void
 }
 
-define spir_kernel void @test_preload_hint_kernel_1_spir_cc(ptr %0) #0 {
+define spir_kernel void @test_preload_hint_kernel_1_spir_cc(ptr %0) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
 ; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    ret void
@@ -209,7 +209,7 @@ define spir_kernel void @test_preload_hint_kernel_1_spir_cc(ptr %0) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_preload_hint_kernel_2_preexisting(i32 inreg %0, i64 %1) #0 {
+define amdgpu_kernel void @test_preload_hint_kernel_2_preexisting(i32 inreg %0, i64 %1) nounwind {
 ; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
 ; NO-PRELOAD-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
 ; NO-PRELOAD-NEXT:    ret void
@@ -257,7 +257,5 @@ define amdgpu_kernel void @test_preload_hint_kernel_incompatible_attributes(ptr
   ret void
 }
 
-declare void @func(ptr) #0
+declare void @func(ptr) nounwind
 declare void @llvm.amdgcn.set.prio(i16)
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index 1be041c8dc9b0b..6cdf2dbdc0acad 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -6,7 +6,7 @@
 ; and introduce a copy. The copy should be of whole-wave with exec mask manipulation around it.
 ; FIXME: The destination register involved in the whole-wave copy should be considered for preserving all the lanes
 ; with a spill/restore at function prolog/epilog. The copy might otherwise clobber its inactive lanes unwantedly.
-define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
+define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) "amdgpu-num-vgpr"="42" "amdgpu-num-sgpr"="40" {
 ; GFX906-LABEL: preserve_wwm_copy_dstreg:
 ; GFX906:       ; %bb.0:
 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -827,7 +827,5 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
 
 declare void @foo()
 
-attributes #0 = { "amdgpu-num-vgpr"="42" "amdgpu-num-sgpr"="40"}
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/printf_builtin.ll b/llvm/test/CodeGen/AMDGPU/printf_builtin.ll
index 36c1f2acbb7edc..62f659c12405bd 100644
--- a/llvm/test/CodeGen/AMDGPU/printf_builtin.ll
+++ b/llvm/test/CodeGen/AMDGPU/printf_builtin.ll
@@ -35,11 +35,8 @@ define void @caller_builtin(i32 %n) {
 ; GCN-NEXT:    ret void
 ;
 entry:
-  %call = call i32 (ptr, ...) @printf(ptr @str, i32 %n) #0
+  %call = call i32 (ptr, ...) @printf(ptr @str, i32 %n) builtin
   ret void
 }
 
-declare i32 @printf(ptr, ...) #1
-
-attributes #0 = { builtin }
-attributes #1 = { nobuiltin }
+declare i32 @printf(ptr, ...) nobuiltin
diff --git a/llvm/test/CodeGen/AMDGPU/printf_nobuiltin.ll b/llvm/test/CodeGen/AMDGPU/printf_nobuiltin.ll
index 8dbb24480bef6f..50129a11de6a4f 100644
--- a/llvm/test/CodeGen/AMDGPU/printf_nobuiltin.ll
+++ b/llvm/test/CodeGen/AMDGPU/printf_nobuiltin.ll
@@ -11,7 +11,7 @@ define void @caller(i32 %n) {
 ; GCN-NEXT:    ret void
 ;
 entry:
-  %call = call i32 (ptr, ...) @printf(ptr @str, i32 %n) #0
+  %call = call i32 (ptr, ...) @printf(ptr @str, i32 %n) nobuiltin
   ret void
 }
 
@@ -40,5 +40,3 @@ entry:
 }
 
 declare i32 @printf(ptr, ...)
-
-attributes #0 = { nobuiltin }
diff --git a/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll b/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll
index f1e5e68927be71..b1015fadd714f9 100644
--- a/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-access-no-objects.ll
@@ -17,7 +17,7 @@
 
 ; OPTNONE-NOT: s_mov_b32
 ; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
-define amdgpu_kernel void @store_to_undef() #0 {
+define amdgpu_kernel void @store_to_undef() nounwind {
   store volatile i32 0, ptr addrspace(5) undef
   ret void
 }
@@ -26,7 +26,7 @@ define amdgpu_kernel void @store_to_undef() #0 {
 ; OPT-DAG: s_mov_b64 s[[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
 ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3]
 ; OPT: buffer_store_dword v{{[0-9]+}}, off, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offset:124{{$}}
-define amdgpu_kernel void @store_to_inttoptr() #0 {
+define amdgpu_kernel void @store_to_inttoptr() nounwind {
  store volatile i32 0, ptr addrspace(5) inttoptr (i32 124 to ptr addrspace(5))
  ret void
 }
@@ -35,7 +35,7 @@ define amdgpu_kernel void @store_to_inttoptr() #0 {
 ; OPT-DAG: s_mov_b64 s[[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
 ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3]
 ; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offen glc{{$}}
-define amdgpu_kernel void @load_from_undef() #0 {
+define amdgpu_kernel void @load_from_undef() nounwind {
   %ld = load volatile i32, ptr addrspace(5) undef
   ret void
 }
@@ -44,9 +44,7 @@ define amdgpu_kernel void @load_from_undef() #0 {
 ; OPT-DAG: s_mov_b64 s[[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
 ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]], s[2:3]
 ; OPT: buffer_load_dword v{{[0-9]+}}, off, s[[[RSRC_LO]]:[[RSRC_HI]]], 0 offset:124 glc{{$}}
-define amdgpu_kernel void @load_from_inttoptr() #0 {
+define amdgpu_kernel void @load_from_inttoptr() nounwind {
   %ld = load volatile i32, ptr addrspace(5) inttoptr (i32 124 to ptr addrspace(5))
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
index 462ab38b4cd58d..ab33e5417214cc 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
@@ -18,7 +18,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 ; OPT: call i32 @llvm.r600.read.tidig.y(), !range !1
 ; OPT: call i32 @llvm.r600.read.tidig.z(), !range !1
 
-define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 {
+define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %0 = load i32, ptr addrspace(1) %in, align 4
@@ -48,7 +48,7 @@ entry:
 ; R600-NOT: MOVA_INT
 %struct.point = type { i32, i32 }
 
-define amdgpu_kernel void @multiple_structs(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @multiple_structs(ptr addrspace(1) %out) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %a = alloca %struct.point, addrspace(5)
   %b = alloca %struct.point, addrspace(5)
@@ -72,7 +72,7 @@ entry:
 ; FUNC-LABEL: {{^}}direct_loop:
 ; R600-NOT: MOVA_INT
 
-define amdgpu_kernel void @direct_loop(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @direct_loop(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %prv_array_const = alloca [2 x i32], addrspace(5)
   %prv_array = alloca [2 x i32], addrspace(5)
@@ -103,7 +103,7 @@ for.end:
 ; FUNC-LABEL: {{^}}short_array:
 
 ; R600-VECT: MOVA_INT
-define amdgpu_kernel void @short_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @short_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %0 = alloca [2 x i16], addrspace(5)
   %1 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 1
@@ -119,7 +119,7 @@ entry:
 ; FUNC-LABEL: {{^}}char_array:
 
 ; R600-VECT: MOVA_INT
-define amdgpu_kernel void @char_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %0 = alloca [2 x i8], addrspace(5)
   %1 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 1
@@ -139,7 +139,7 @@ entry:
 ; R600-NOT: MOV T0.X
 ; Additional check in case the move ends up in the last slot
 ; R600-NOT: MOV * TO.X
-define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %0 = alloca [2 x i32], addrspace(5)
   %1 = getelementptr inbounds [2 x i32], ptr addrspace(5) %0, i32 0, i32 1
@@ -156,7 +156,7 @@ entry:
 ; Test that two stack objects are not stored in the same register
 ; The second stack object should be in T3.X
 ; FUNC-LABEL: {{^}}no_overlap:
-define amdgpu_kernel void @no_overlap(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @no_overlap(ptr addrspace(1) %out, i32 %in) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %0 = alloca [3 x i8], align 1, addrspace(5)
   %1 = alloca [2 x i8], align 1, addrspace(5)
@@ -178,7 +178,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @char_array_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x [2 x i8]], addrspace(5)
   %gep1 = getelementptr inbounds [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -191,7 +191,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @i32_array_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @i32_array_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x [2 x i32]], addrspace(5)
   %gep1 = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -203,7 +203,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @i64_array_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @i64_array_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x [2 x i64]], addrspace(5)
   %gep1 = getelementptr inbounds [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -217,7 +217,7 @@ entry:
 
 %struct.pair32 = type { i32, i32 }
 
-define amdgpu_kernel void @struct_array_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_array_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x [2 x %struct.pair32]], addrspace(5)
   %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 0, i32 1
@@ -230,7 +230,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @struct_pair32_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_pair32_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [2 x %struct.pair32], addrspace(5)
   %gep0 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -263,7 +263,7 @@ entry:
 ; SI-NOT: ds_write
 ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
 ; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
-define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" {
   %alloca = alloca [16 x i32], addrspace(5)
   %tmp0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
   store i32 5, ptr addrspace(5) %tmp0
@@ -278,5 +278,3 @@ define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
 
 ; OPT: !0 = !{i32 0, i32 257}
 ; OPT: !1 = !{i32 0, i32 256}
-
-attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll b/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll
index 79bcaf83cec7c0..4498303591b962 100644
--- a/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll
+++ b/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll
@@ -6,7 +6,7 @@
 ; and also before the beginning of the epilogue instructions in a trivial function.
 
 ; Function Attrs: convergent noinline nounwind optnone mustprogress
-define hidden void @_Z9base_casev() #0 !dbg !6 {
+define hidden void @_Z9base_casev() nounwind !dbg !6 {
 ; CHECK-LABEL: _Z9base_casev:
 ; CHECK:       .Lfunc_begin0:
 ; CHECK-NEXT:    .file 0 "dir" "file.cpp"
@@ -31,8 +31,6 @@ entry:
   ret void, !dbg !7
 }
 
-attributes #0 = { nounwind }
-
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!4, !5}
 
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll
index 8a467812ec4850..81ff514600d45d 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll
@@ -7,7 +7,7 @@
 ; CHECK: [[ASC:%[a-z0-9]+]] = addrspacecast ptr addrspace(3) [[GEP]] to ptr
 ; CHECK: [[LOAD:%[a-z0-9]+]] = load <2 x i16>, ptr [[ASC]]
 ; CHECK: bitcast <2 x i16> [[LOAD]] to <2 x half>
-define amdgpu_kernel void @invalid_bitcast_addrspace() #0 {
+define amdgpu_kernel void @invalid_bitcast_addrspace() nounwind "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %data = alloca [1 x i32], addrspace(5)
   %tmp1 = addrspacecast ptr addrspace(5) %data to ptr
@@ -15,5 +15,3 @@ entry:
   %tmp4 = bitcast <2 x i16> %tmp3 to <2 x half>
   ret void
 }
-
-attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll
index a590bbc8023e0d..fe247525791ed2 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll
@@ -5,7 +5,7 @@
 
 ; CHECK-LABEL: @array_alloca(
 ; CHECK: %stack = alloca i32, i32 5, align 4, addrspace(5)
-define amdgpu_kernel void @array_alloca(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 {
+define amdgpu_kernel void @array_alloca(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind {
 entry:
   %stack = alloca i32, i32 5, align 4, addrspace(5)
   %ld0 = load i32, ptr addrspace(1) %in, align 4
@@ -26,7 +26,7 @@ entry:
 
 ; CHECK-LABEL: @array_alloca_dynamic(
 ; CHECK: %stack = alloca i32, i32 %size, align 4, addrspace(5)
-define amdgpu_kernel void @array_alloca_dynamic(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %size) #0 {
+define amdgpu_kernel void @array_alloca_dynamic(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %size) nounwind {
 entry:
   %stack = alloca i32, i32 %size, align 4, addrspace(5)
   %ld0 = load i32, ptr addrspace(1) %in, align 4
@@ -44,5 +44,3 @@ entry:
   store i32 %ld3, ptr addrspace(1) %arrayidx13
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
index 5a7d811acb02ac..2536fe2d59f73c 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll
@@ -3,25 +3,23 @@
 ; Make sure that AMDGPUPromoteAlloca doesn't crash if the called
 ; function is a constantexpr cast of a function.
 
-declare void @foo(ptr addrspace(5)) #0
-declare void @foo.varargs(...) #0
+declare void @foo(ptr addrspace(5)) nounwind
+declare void @foo.varargs(...) nounwind
 
 ; CHECK-LABEL: @crash_call_constexpr_cast(
 ; CHECK: alloca
 ; CHECK: call void
-define amdgpu_kernel void @crash_call_constexpr_cast() #0 {
+define amdgpu_kernel void @crash_call_constexpr_cast() nounwind {
   %alloca = alloca i32, addrspace(5)
-  call void @foo(ptr addrspace(5) %alloca) #0
+  call void @foo(ptr addrspace(5) %alloca) nounwind
   ret void
 }
 
 ; CHECK-LABEL: @crash_call_constexpr_cast_varargs(
 ; CHECK: alloca
 ; CHECK: call void
-define amdgpu_kernel void @crash_call_constexpr_cast_varargs() #0 {
+define amdgpu_kernel void @crash_call_constexpr_cast_varargs() nounwind {
   %alloca = alloca i32, addrspace(5)
-  call void @foo.varargs(ptr addrspace(5) %alloca) #0
+  call void @foo.varargs(ptr addrspace(5) %alloca) nounwind
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
index a585901fc377cb..3a775287761757 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
@@ -1,25 +1,21 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn-- -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 target datalayout = "A5"
 
-declare ptr @llvm.invariant.start.p5(i64, ptr addrspace(5) nocapture) #0
-declare void @llvm.invariant.end.p5(ptr, i64, ptr addrspace(5) nocapture) #0
-declare ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5)) #1
+declare ptr @llvm.invariant.start.p5(i64, ptr addrspace(5) nocapture) argmemonly nounwind
+declare void @llvm.invariant.end.p5(ptr, i64, ptr addrspace(5) nocapture) argmemonly nounwind
+declare ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5)) nounwind readnone
 
 ; GCN-LABEL: {{^}}use_invariant_promotable_lds:
 ; GCN: buffer_load_dword
 ; GCN: ds_write_b32
-define amdgpu_kernel void @use_invariant_promotable_lds(ptr addrspace(1) %arg) #2 {
+define amdgpu_kernel void @use_invariant_promotable_lds(ptr addrspace(1) %arg) nounwind {
 bb:
   %tmp = alloca i32, align 4, addrspace(5)
   %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
   %tmp3 = load i32, ptr addrspace(1) %tmp2
   store i32 %tmp3, ptr addrspace(5) %tmp
-  %tmp4 = call ptr @llvm.invariant.start.p5(i64 4, ptr addrspace(5) %tmp) #0
-  call void @llvm.invariant.end.p5(ptr %tmp4, i64 4, ptr addrspace(5) %tmp) #0
-  %tmp5 = call ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5) %tmp) #1
+  %tmp4 = call ptr @llvm.invariant.start.p5(i64 4, ptr addrspace(5) %tmp) argmemonly nounwind
+  call void @llvm.invariant.end.p5(ptr %tmp4, i64 4, ptr addrspace(5) %tmp) argmemonly nounwind
+  %tmp5 = call ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5) %tmp) nounwind readnone
   ret void
 }
-
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll
index 2230e12a30f590..1657a5ad2a415c 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll
@@ -2,14 +2,14 @@
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
 
-declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #0
-declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #0
+declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) argmemonly nounwind
+declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) argmemonly nounwind
 
 ; OPT-LABEL: @use_lifetime_promotable_lds(
 ; OPT-NOT: alloca i32
 ; OPT-NOT: llvm.lifetime
 ; OPT: store i32 %tmp3, ptr addrspace(3)
-define amdgpu_kernel void @use_lifetime_promotable_lds(ptr addrspace(1) %arg) #2 {
+define amdgpu_kernel void @use_lifetime_promotable_lds(ptr addrspace(1) %arg) nounwind {
 bb:
   %tmp = alloca i32, align 4, addrspace(5)
   call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %tmp)
@@ -31,7 +31,3 @@ entry:
   call void @llvm.lifetime.start.p5(i64 1, ptr addrspace(5) %alloca)
   ret void
 }
-
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
index aabd5df9568370..4665e22d8cf9d5 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
@@ -1,22 +1,22 @@
 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -passes=amdgpu-promote-alloca < %s | FileCheck --enable-var-scope %s
 
-declare void @llvm.memcpy.p5.p1.i32(ptr addrspace(5) nocapture, ptr addrspace(1) nocapture, i32, i1) #0
-declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture, ptr addrspace(5) nocapture, i32, i1) #0
-declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture, i64, i1) #0
+declare void @llvm.memcpy.p5.p1.i32(ptr addrspace(5) nocapture, ptr addrspace(1) nocapture, i32, i1) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3"
+declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture, ptr addrspace(5) nocapture, i32, i1) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3"
+declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture, i64, i1) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3"
 
-declare void @llvm.memmove.p5.p1.i32(ptr addrspace(5) nocapture, ptr addrspace(1) nocapture, i32, i1) #0
-declare void @llvm.memmove.p1.p5.i32(ptr addrspace(1) nocapture, ptr addrspace(5) nocapture, i32, i1) #0
-declare void @llvm.memmove.p5.p5.i64(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture, i64, i1) #0
+declare void @llvm.memmove.p5.p1.i32(ptr addrspace(5) nocapture, ptr addrspace(1) nocapture, i32, i1) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3"
+declare void @llvm.memmove.p1.p5.i32(ptr addrspace(1) nocapture, ptr addrspace(5) nocapture, i32, i1) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3"
+declare void @llvm.memmove.p5.p5.i64(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture, i64, i1) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3"
 
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture, i8, i32, i1) #0
+declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture, i8, i32, i1) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3"
 
-declare i32 @llvm.objectsize.i32.p5(ptr addrspace(5), i1, i1, i1) #1
+declare i32 @llvm.objectsize.i32.p5(ptr addrspace(5), i1, i1, i1) nounwind readnone
 
 ; CHECK-LABEL: @promote_with_memcpy(
 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], ptr addrspace(3) @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}}
 ; CHECK: call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 [[GEP]], ptr addrspace(1) align 4 %in, i32 68, i1 false)
 ; CHECK: call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 [[GEP]], i32 68, i1 false)
-define amdgpu_kernel void @promote_with_memcpy(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @promote_with_memcpy(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3" {
   %alloca = alloca [17 x i32], align 4, addrspace(5)
   call void @llvm.memcpy.p5.p1.i32(ptr addrspace(5) align 4 %alloca, ptr addrspace(1) align 4 %in, i32 68, i1 false)
   call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 %out, ptr addrspace(5) align 4 %alloca, i32 68, i1 false)
@@ -27,7 +27,7 @@ define amdgpu_kernel void @promote_with_memcpy(ptr addrspace(1) %out, ptr addrsp
 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], ptr addrspace(3) @promote_with_memmove.alloca, i32 0, i32 %{{[0-9]+}}
 ; CHECK: call void @llvm.memmove.p3.p1.i32(ptr addrspace(3) align 4 [[GEP]], ptr addrspace(1) align 4 %in, i32 68, i1 false)
 ; CHECK: call void @llvm.memmove.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 [[GEP]], i32 68, i1 false)
-define amdgpu_kernel void @promote_with_memmove(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @promote_with_memmove(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3" {
   %alloca = alloca [17 x i32], align 4, addrspace(5)
   call void @llvm.memmove.p5.p1.i32(ptr addrspace(5) align 4 %alloca, ptr addrspace(1) align 4 %in, i32 68, i1 false)
   call void @llvm.memmove.p1.p5.i32(ptr addrspace(1) align 4 %out, ptr addrspace(5) align 4 %alloca, i32 68, i1 false)
@@ -37,7 +37,7 @@ define amdgpu_kernel void @promote_with_memmove(ptr addrspace(1) %out, ptr addrs
 ; CHECK-LABEL: @promote_with_memset(
 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], ptr addrspace(3) @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}}
 ; CHECK: call void @llvm.memset.p3.i32(ptr addrspace(3) align 4 [[GEP]], i8 7, i32 68, i1 false)
-define amdgpu_kernel void @promote_with_memset(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @promote_with_memset(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3" {
   %alloca = alloca [17 x i32], align 4, addrspace(5)
   call void @llvm.memset.p5.i32(ptr addrspace(5) align 4 %alloca, i8 7, i32 68, i1 false)
   ret void
@@ -46,7 +46,7 @@ define amdgpu_kernel void @promote_with_memset(ptr addrspace(1) %out, ptr addrsp
 ; CHECK-LABEL: @promote_with_objectsize(
 ; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], ptr addrspace(3) @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}}
 ; CHECK: call i32 @llvm.objectsize.i32.p3(ptr addrspace(3) [[PTR]], i1 false, i1 false, i1 false)
-define amdgpu_kernel void @promote_with_objectsize(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @promote_with_objectsize(ptr addrspace(1) %out) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3" {
   %alloca = alloca [17 x i32], align 4, addrspace(5)
   %size = call i32 @llvm.objectsize.i32.p5(ptr addrspace(5) %alloca, i1 false, i1 false, i1 false)
   store i32 %size, ptr addrspace(1) %out
@@ -55,7 +55,7 @@ define amdgpu_kernel void @promote_with_objectsize(ptr addrspace(1) %out) #0 {
 
 ; CHECK-LABEL: @promote_with_objectsize_8(
 ; CHECK: store i32 32, ptr addrspace(1) %out, align 4
-define amdgpu_kernel void @promote_with_objectsize_8(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @promote_with_objectsize_8(ptr addrspace(1) %out) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3" {
   %alloca = alloca [8 x i32], align 4, addrspace(5)
   %size = call i32 @llvm.objectsize.i32.p5(ptr addrspace(5) %alloca, i1 false, i1 false, i1 false)
   store i32 %size, ptr addrspace(1) %out
@@ -82,6 +82,3 @@ entry:
   call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) align 8 dereferenceable(16) %arrayidx1, ptr addrspace(5) align 8 dereferenceable(16) %arrayidx2, i64 16, i1 false)
   ret void
 }
-
-attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3" }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
index 28b923243b6db2..21fe931cfee997 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
@@ -5,7 +5,7 @@
 ; NOOPTS: .amdhsa_group_segment_fixed_size 0
 ; NOOPTS-NOT: ds_write
 ; OPTS: ds_write
-define amdgpu_kernel void @promote_alloca_i32_array_array(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @promote_alloca_i32_array_array(ptr addrspace(1) %out, i32 %index) nounwind "amdgpu-flat-work-group-size"="64,64" {
 entry:
   %alloca = alloca [2 x [2 x i32]], addrspace(5)
   %gep1 = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -20,7 +20,7 @@ entry:
 ; ALL-LABEL: {{^}}optnone_promote_alloca_i32_array_array:
 ; ALL: .amdhsa_group_segment_fixed_size 0
 ; ALL-NOT: ds_write
-define amdgpu_kernel void @optnone_promote_alloca_i32_array_array(ptr addrspace(1) %out, i32 %index) #1 {
+define amdgpu_kernel void @optnone_promote_alloca_i32_array_array(ptr addrspace(1) %out, i32 %index) nounwind optnone noinline "amdgpu-flat-work-group-size"="64,64" {
 entry:
   %alloca = alloca [2 x [2 x i32]], addrspace(5)
   %gep1 = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
@@ -32,8 +32,5 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" }
-attributes #1 = { nounwind optnone noinline "amdgpu-flat-work-group-size"="64,64" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
index 20a8cfc2a27996..c1713a2b561eb0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
@@ -32,7 +32,7 @@
 
 ; GCN-LABEL: {{^}}promote_alloca_size_order_0:
 ; GCN: .amdhsa_group_segment_fixed_size 1060
-define amdgpu_kernel void @promote_alloca_size_order_0(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) #0 {
+define amdgpu_kernel void @promote_alloca_size_order_0(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,7" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %tmp0 = load i32, ptr addrspace(1) %in, align 4
@@ -63,7 +63,7 @@ entry:
 
 ; GCN-LABEL: {{^}}promote_alloca_size_order_1:
 ; GCN: .amdhsa_group_segment_fixed_size 1072
-define amdgpu_kernel void @promote_alloca_size_order_1(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) #0 {
+define amdgpu_kernel void @promote_alloca_size_order_1(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,7" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %tmp0 = load i32, ptr addrspace(1) %in, align 4
@@ -100,7 +100,7 @@ entry:
 
 ; GCN-LABEL: {{^}}promote_alloca_align_pad_guess_over_limit:
 ; GCN: .amdhsa_group_segment_fixed_size 1060
-define amdgpu_kernel void @promote_alloca_align_pad_guess_over_limit(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) #0 {
+define amdgpu_kernel void @promote_alloca_align_pad_guess_over_limit(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in, i32 %idx) nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,7" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %tmp0 = load i32, ptr addrspace(1) %in, align 4
@@ -126,7 +126,5 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,7" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
index ed0fe0d4ef4d31..6a74765123e2c2 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-stored-pointer-value.ll
@@ -5,7 +5,7 @@
 
 ; GCN-LABEL: {{^}}stored_lds_pointer_value:
 ; GCN: buffer_store_dword v
-define amdgpu_kernel void @stored_lds_pointer_value(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @stored_lds_pointer_value(ptr addrspace(1) %ptr) nounwind {
   %tmp = alloca float, addrspace(5)
   store float 0.0, ptr  addrspace(5) %tmp
   store ptr addrspace(5) %tmp, ptr addrspace(1) %ptr
@@ -14,7 +14,7 @@ define amdgpu_kernel void @stored_lds_pointer_value(ptr addrspace(1) %ptr) #0 {
 
 ; GCN-LABEL: {{^}}stored_lds_pointer_value_offset:
 ; GCN: buffer_store_dword v
-define amdgpu_kernel void @stored_lds_pointer_value_offset(ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @stored_lds_pointer_value_offset(ptr addrspace(1) %ptr) nounwind {
   %tmp0 = alloca float, addrspace(5)
   %tmp1 = alloca float, addrspace(5)
   store float 0.0, ptr  addrspace(5) %tmp0
@@ -29,7 +29,7 @@ define amdgpu_kernel void @stored_lds_pointer_value_offset(ptr addrspace(1) %ptr
 ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
 ; GCN: buffer_store_dword v
 ; GCN: buffer_store_dword v
-define amdgpu_kernel void @stored_lds_pointer_value_gep(ptr addrspace(1) %ptr, i32 %idx) #0 {
+define amdgpu_kernel void @stored_lds_pointer_value_gep(ptr addrspace(1) %ptr, i32 %idx) nounwind {
 bb:
   %tmp = alloca float, i32 16, addrspace(5)
   store float 0.0, ptr addrspace(5) %tmp
@@ -63,11 +63,9 @@ entry:
 
 ; GCN-LABEL: {{^}}stored_fi_to_self:
 ; GCN-NOT: ds_
-define amdgpu_kernel void @stored_fi_to_self() #0 {
+define amdgpu_kernel void @stored_fi_to_self() nounwind {
   %tmp = alloca ptr addrspace(5), addrspace(5)
   store volatile ptr addrspace(5) inttoptr (i32 1234 to ptr addrspace(5)), ptr addrspace(5) %tmp
   store volatile ptr addrspace(5) %tmp, ptr addrspace(5) %tmp
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-constantexpr-use.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-constantexpr-use.ll
index 83acd5eddc816b..9e136fa32b86cb 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-constantexpr-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-constantexpr-use.ll
@@ -18,7 +18,7 @@ target datalayout = "A5"
 
 ; ASM-LABEL: constant_expression_uses_all_lds:
 ; ASM: .amdhsa_group_segment_fixed_size 65536
-define amdgpu_kernel void @constant_expression_uses_all_lds(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @constant_expression_uses_all_lds(ptr addrspace(1) nocapture %out, i32 %idx) "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="256,256" {
 entry:
   %stack = alloca [4 x i32], align 4, addrspace(5)
   %gep1 = getelementptr inbounds [4 x i32], ptr addrspace(5) %stack, i32 0, i32 1
@@ -44,7 +44,7 @@ entry:
 
 ; ASM-LABEL: {{^}}constant_expression_uses_some_lds:
 ; ASM: .amdhsa_group_segment_fixed_size 4224{{$}}
-define amdgpu_kernel void @constant_expression_uses_some_lds(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @constant_expression_uses_some_lds(ptr addrspace(1) nocapture %out, i32 %idx) "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="256,256" {
 entry:
   %stack = alloca [4 x i32], align 4, addrspace(5)
   %gep1 = getelementptr inbounds [4 x i32], ptr addrspace(5) %stack, i32 0, i32 1
@@ -69,7 +69,7 @@ entry:
 
 ; ASM-LABEL: {{^}}constant_expression_uses_some_dynamic_lds:
 ; ASM: .amdhsa_group_segment_fixed_size 0{{$}}
-define amdgpu_kernel void @constant_expression_uses_some_dynamic_lds(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @constant_expression_uses_some_dynamic_lds(ptr addrspace(1) nocapture %out, i32 %idx) "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="256,256" {
 entry:
   %stack = alloca [4 x i32], align 4, addrspace(5)
   %gep1 = getelementptr inbounds [4 x i32], ptr addrspace(5) %stack, i32 0, i32 1
@@ -93,7 +93,7 @@ declare void @callee(ptr)
 
 ; ASM-LABEL: {{^}}constant_expression_uses_all_lds_multi_level:
 ; ASM: .amdhsa_group_segment_fixed_size 65536{{$}}
-define amdgpu_kernel void @constant_expression_uses_all_lds_multi_level(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @constant_expression_uses_all_lds_multi_level(ptr addrspace(1) nocapture %out, i32 %idx) "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="256,256" {
 entry:
   %stack = alloca [4 x i32], align 4, addrspace(5)
   %gep1 = getelementptr inbounds [4 x i32], ptr addrspace(5) %stack, i32 0, i32 1
@@ -116,7 +116,7 @@ entry:
 
 ; ASM-LABEL: {{^}}constant_expression_uses_some_lds_multi_level:
 ; ASM: .amdhsa_group_segment_fixed_size 4224{{$}}
-define amdgpu_kernel void @constant_expression_uses_some_lds_multi_level(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @constant_expression_uses_some_lds_multi_level(ptr addrspace(1) nocapture %out, i32 %idx) "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="256,256" {
 entry:
   %stack = alloca [4 x i32], align 4, addrspace(5)
   %gep1 = getelementptr inbounds [4 x i32], ptr addrspace(5) %stack, i32 0, i32 1
@@ -138,7 +138,7 @@ entry:
 
 ; ASM-LABEL: {{^}}constant_expression_uses_some_dynamic_lds_multi_level:
 ; ASM: .amdhsa_group_segment_fixed_size 0{{$}}
-define amdgpu_kernel void @constant_expression_uses_some_dynamic_lds_multi_level(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @constant_expression_uses_some_dynamic_lds_multi_level(ptr addrspace(1) nocapture %out, i32 %idx) "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="256,256" {
 entry:
   %stack = alloca [4 x i32], align 4, addrspace(5)
   %gep1 = getelementptr inbounds [4 x i32], ptr addrspace(5) %stack, i32 0, i32 1
@@ -161,7 +161,7 @@ entry:
 
 ; ASM-LABEL: {{^}}constant_expression_uses_some_lds_global_initializer:
 ; ASM: .amdhsa_group_segment_fixed_size 4096{{$}}
-define amdgpu_kernel void @constant_expression_uses_some_lds_global_initializer(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @constant_expression_uses_some_lds_global_initializer(ptr addrspace(1) nocapture %out, i32 %idx) "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="256,256" {
 entry:
   %stack = alloca [4 x i32], align 4, addrspace(5)
   %gep1 = getelementptr inbounds [4 x i32], ptr addrspace(5) %stack, i32 0, i32 1
@@ -187,7 +187,7 @@ entry:
 
 ; ASM-LABEL: {{^}}constant_expression_uses_all_lds_global_initializer:
 ; ASM: .group_segment_fixed_size: 65536
-define amdgpu_kernel void @constant_expression_uses_all_lds_global_initializer(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @constant_expression_uses_all_lds_global_initializer(ptr addrspace(1) nocapture %out, i32 %idx) "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="256,256" {
 entry:
   %stack = alloca [4 x i32], align 4, addrspace(5)
   %gep1 = getelementptr inbounds [4 x i32], ptr addrspace(5) %stack, i32 0, i32 1
@@ -203,5 +203,3 @@ entry:
   store volatile i32 ptrtoint (ptr addrspace(1) @initializer_user_all to i32), ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { "amdgpu-waves-per-eu"="1,5" "amdgpu-flat-work-group-size"="256,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
index efc11bf1a606da..f08874febee5c0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
@@ -5,7 +5,7 @@
 ; This normally would be fixed by instcombine to be compare to the GEP
 ; indices
 
-define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 ; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
@@ -47,7 +47,7 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(ptr add
   ret void
 }
 
-define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
@@ -86,7 +86,7 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
@@ -125,7 +125,7 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 ; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [16 x i32], align 4, addrspace(5)
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[A:%.*]]
@@ -153,6 +153,4 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(ptr addrspace(1)
   ret void
 }
 
-declare ptr addrspace(5) @get_unknown_pointer() #0
-
-attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
+declare ptr addrspace(5) @get_unknown_pointer() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256"
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
index 8f34612735d917..1cbe8b6640492c 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
@@ -13,7 +13,7 @@
 ; CHECK: endif:
 ; CHECK: %phi.ptr = phi ptr addrspace(3) [ %arrayidx0, %if ], [ %arrayidx1, %else ]
 ; CHECK: store i32 0, ptr addrspace(3) %phi.ptr, align 4
-define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b, i1 %c0) #0 {
+define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b, i1 %c0) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [64 x i32], align 4, addrspace(5)
   br i1 %c0, label %if, label %else
@@ -34,7 +34,7 @@ endif:
 
 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_0(
 ; CHECK: %phi.ptr = phi ptr addrspace(3) [ %arrayidx0, %if ], [ null, %entry ]
-define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b, i1 %c0) #0 {
+define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b, i1 %c0) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [64 x i32], align 4, addrspace(5)
   br i1 %c0, label %if, label %endif
@@ -51,7 +51,7 @@ endif:
 
 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_1(
 ; CHECK: %phi.ptr = phi ptr addrspace(3)  [ null, %entry ], [ %arrayidx0, %if ]
-define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b, i1 %c0) #0 {
+define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b, i1 %c0) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [64 x i32], align 4, addrspace(5)
   br i1 %c0, label %if, label %endif
@@ -73,7 +73,7 @@ endif:
 ; CHECK: br label %exit
 ; CHECK: %phi.ptr = phi ptr addrspace(3) [ %arrayidx0, %entry ]
 ; CHECK: store i32 0, ptr addrspace(3) %phi.ptr, align 4
-define amdgpu_kernel void @one_phi_value(i32 %a) #0 {
+define amdgpu_kernel void @one_phi_value(i32 %a) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [64 x i32], align 4, addrspace(5)
   %arrayidx0 = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
@@ -97,7 +97,7 @@ exit:
 ; CHECK: endif:
 ; CHECK: %phi.ptr = phi ptr addrspace(5) [ %arrayidx0, %if ], [ %arrayidx1, %else ]
 ; CHECK: store i32 0, ptr addrspace(5) %phi.ptr, align 4
-define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b, i1 %c0) #0 {
+define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b, i1 %c0) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [64 x i32], align 4, addrspace(5)
   br i1 %c0, label %if, label %else
@@ -134,7 +134,7 @@ endif:
 ; CHECK-LABEL: @ptr_induction_var_same_alloca(
 ; CHECK: %alloca = alloca [64 x i32], align 4
 ; CHECK: phi ptr addrspace(5) [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
-define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 {
+define amdgpu_kernel void @ptr_induction_var_same_alloca() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [64 x i32], align 4, addrspace(5)
   %arrayidx = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 2
@@ -172,7 +172,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK: %alloca = alloca [64 x i32], align 4
 ; CHECK: %p.08 = phi ptr addrspace(5) [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
 ; CHECK: %cmp = icmp eq ptr addrspace(5) %incdec.ptr, %call
-define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 {
+define amdgpu_kernel void @ptr_induction_var_alloca_unknown() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [64 x i32], align 4, addrspace(5)
   %arrayidx = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 2
@@ -199,6 +199,4 @@ for.body:                                         ; preds = %for.body, %for.body
   br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-declare ptr addrspace(5) @get_unknown_pointer() #0
-
-attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
+declare ptr addrspace(5) @get_unknown_pointer() nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256"
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
index 4a15dad069c63c..084f21f2213723 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
 ; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
 ; CHECK: %alloca = alloca i32
 ; CHECK: select i1 undef, ptr addrspace(5) undef, ptr addrspace(5) %alloca
-define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
+define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand() norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
   %alloca = alloca i32, align 4, addrspace(5)
   %select = select i1 undef, ptr addrspace(5) undef, ptr addrspace(5) %alloca
   store i32 0, ptr addrspace(5) %select, align 4
@@ -18,7 +18,7 @@ define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand()
 ; CHECK: %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(3) [[ARRAYGEP]], i32 0, i32 %b
 ; CHECK: %select = select i1 undef, ptr addrspace(3) %ptr0, ptr addrspace(3) %ptr1
 ; CHECK: store i32 0, ptr addrspace(3) %select, align 4
-define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
   %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %b
@@ -35,7 +35,7 @@ define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a
 ; CHECK: %ptr0 = getelementptr inbounds i32, ptr addrspace(5) %alloca0, i32 %a
 ; CHECK: %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %alloca1, i32 %b
 ; CHECK: %select = select i1 undef, ptr addrspace(5) %ptr0, ptr addrspace(5) %ptr1
-define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
   %alloca0 = alloca i32, i32 16, align 4, addrspace(5)
   %alloca1 = alloca i32, i32 16, align 4, addrspace(5)
   %ptr0 = getelementptr inbounds i32, ptr addrspace(5) %alloca0, i32 %a
@@ -52,7 +52,7 @@ define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b)
 ; CHECK: %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(3) [[ARRAYGEP]], i32 0, i32 3
 ; CHECK: %select = select i1 undef, ptr addrspace(3) %ptr0, ptr addrspace(3) %ptr1
 ; CHECK: store i32 0, ptr addrspace(3) %select, align 4
-define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
+define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointers() norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
   %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 3
@@ -66,7 +66,7 @@ define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointe
 
 ; CHECK-LABEL: @lds_promoted_alloca_select_input_select(
 ; CHECK: alloca
-define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c, i1 %c1, i1 %c2) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c, i1 %c1, i1 %c2) norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
   %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %b
@@ -77,7 +77,7 @@ define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %
   ret void
 }
 
-define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c, i1 %c0) #0 {
+define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c, i1 %c0) norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
@@ -101,7 +101,7 @@ bb2:
 ; CHECK-LABEL: @select_null_rhs(
 ; CHECK-NOT: alloca
 ; CHECK: select i1 %tmp2, ptr addrspace(3) %{{[0-9]+}}, ptr addrspace(3) null
-define amdgpu_kernel void @select_null_rhs(ptr addrspace(1) nocapture %arg, i32 %arg1) #1 {
+define amdgpu_kernel void @select_null_rhs(ptr addrspace(1) nocapture %arg, i32 %arg1) norecurse nounwind {
 bb:
   %tmp = alloca double, align 8, addrspace(5)
   store double 0.000000e+00, ptr addrspace(5) %tmp, align 8
@@ -116,7 +116,7 @@ bb:
 ; CHECK-LABEL: @select_null_lhs(
 ; CHECK-NOT: alloca
 ; CHECK: select i1 %tmp2, ptr addrspace(3) null, ptr addrspace(3) %{{[0-9]+}}
-define amdgpu_kernel void @select_null_lhs(ptr addrspace(1) nocapture %arg, i32 %arg1) #1 {
+define amdgpu_kernel void @select_null_lhs(ptr addrspace(1) nocapture %arg, i32 %arg1) norecurse nounwind {
 bb:
   %tmp = alloca double, align 8, addrspace(5)
   store double 0.000000e+00, ptr addrspace(5) %tmp, align 8
@@ -127,6 +127,3 @@ bb:
   store double %tmp4, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
-attributes #1 = { norecurse nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll
index 1fe1904592a7df..76ae81f3afd0c0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll
@@ -3,12 +3,12 @@
 ; This is just an arbitrary intrinisic that shouldn't be
 ; handled to ensure it doesn't crash.
 
-declare void @llvm.stackrestore.p5(ptr addrspace(5)) #2
+declare void @llvm.stackrestore.p5(ptr addrspace(5)) nounwind
 
 ; CHECK-LABEL: @try_promote_unhandled_intrinsic(
 ; CHECK: alloca
 ; CHECK: call void @llvm.stackrestore.p5(ptr addrspace(5) %tmp)
-define amdgpu_kernel void @try_promote_unhandled_intrinsic(ptr addrspace(1) %arg) #2 {
+define amdgpu_kernel void @try_promote_unhandled_intrinsic(ptr addrspace(1) %arg) nounwind {
 bb:
   %tmp = alloca i32, addrspace(5)
   %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
@@ -17,7 +17,3 @@ bb:
   call void @llvm.stackrestore.p5(ptr addrspace(5) %tmp)
   ret void
 }
-
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-volatile.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-volatile.ll
index 1cddc528b54e1f..803ae21bc06aea 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-volatile.ll
@@ -41,7 +41,7 @@ entry:
 ; CHECK: alloca double
 ; CHECK: load double
 ; CHECK: load volatile double
-define amdgpu_kernel void @volatile_and_non_volatile_load(ptr addrspace(1) nocapture %arg, i32 %arg1) #0 {
+define amdgpu_kernel void @volatile_and_non_volatile_load(ptr addrspace(1) nocapture %arg, i32 %arg1) nounwind {
 bb:
   %tmp = alloca double, align 8, addrspace(5)
   store double 0.000000e+00, ptr addrspace(5) %tmp, align 8
@@ -52,5 +52,3 @@ bb:
   store double %tmp4, ptr addrspace(1) %arg
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index b6afb7cf8c9a11..77bfd76937f6ca 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX90A %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-declare i64 @_Z13get_global_idj(i32) #0
+declare i64 @_Z13get_global_idj(i32) nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false"
 
 define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1)  %buffer) {
 ; GFX8-LABEL: clmem_read_simplified:
@@ -2561,5 +2561,3 @@ entry:
   store i64 %add, ptr addrspace(1) %add.ptr12, align 8
   ret void
 }
-
-attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-vect3-load.ll b/llvm/test/CodeGen/AMDGPU/promote-vect3-load.ll
index dabb9d43bf3d68..e129ef309d5f85 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-vect3-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-vect3-load.ll
@@ -6,7 +6,7 @@
 ; This type promotion on smaller aligned loads can cause a page fault error
 ; while accessing one extra dword beyond the buffer.
 
-define protected amdgpu_kernel void @load_v3i32_align4(ptr addrspace(1) %arg) #0 {
+define protected amdgpu_kernel void @load_v3i32_align4(ptr addrspace(1) %arg) nounwind noinline {
 ; GCN-LABEL: load_v3i32_align4:
 ; GCN:       ; %bb.0:
 ; GCN:         s_waitcnt lgkmcnt(0)
@@ -17,7 +17,7 @@ define protected amdgpu_kernel void @load_v3i32_align4(ptr addrspace(1) %arg) #0
   ret void
 }
 
-define protected amdgpu_kernel void @load_v3i32_align8(ptr addrspace(1) %arg) #0 {
+define protected amdgpu_kernel void @load_v3i32_align8(ptr addrspace(1) %arg) nounwind noinline {
 ; GCN-LABEL: load_v3i32_align8:
 ; GCN:       ; %bb.0:
 ; GCN:         s_waitcnt lgkmcnt(0)
@@ -27,7 +27,7 @@ define protected amdgpu_kernel void @load_v3i32_align8(ptr addrspace(1) %arg) #0
   ret void
 }
 
-define protected amdgpu_kernel void @load_v3i32_align16(ptr addrspace(1) %arg) #0 {
+define protected amdgpu_kernel void @load_v3i32_align16(ptr addrspace(1) %arg) nounwind noinline {
 ; GCN-LABEL: load_v3i32_align16:
 ; GCN:       ; %bb.0:
 ; GCN:         s_waitcnt lgkmcnt(0)
@@ -37,7 +37,7 @@ define protected amdgpu_kernel void @load_v3i32_align16(ptr addrspace(1) %arg) #
   ret void
 }
 
-define protected amdgpu_kernel void @load_v3f32_align4(ptr addrspace(1) %arg) #0 {
+define protected amdgpu_kernel void @load_v3f32_align4(ptr addrspace(1) %arg) nounwind noinline {
 ; GCN-LABEL: load_v3f32_align4:
 ; GCN:       ; %bb.0:
 ; GCN:         s_waitcnt lgkmcnt(0)
@@ -48,7 +48,7 @@ define protected amdgpu_kernel void @load_v3f32_align4(ptr addrspace(1) %arg) #0
   ret void
 }
 
-define protected amdgpu_kernel void @load_v3f32_align8(ptr addrspace(1) %arg) #0 {
+define protected amdgpu_kernel void @load_v3f32_align8(ptr addrspace(1) %arg) nounwind noinline {
 ; GCN-LABEL: load_v3f32_align8:
 ; GCN:       ; %bb.0:
 ; GCN:         s_waitcnt lgkmcnt(0)
@@ -58,7 +58,7 @@ define protected amdgpu_kernel void @load_v3f32_align8(ptr addrspace(1) %arg) #0
   ret void
 }
 
-define protected amdgpu_kernel void @load_v3f32_align16(ptr addrspace(1) %arg) #0 {
+define protected amdgpu_kernel void @load_v3f32_align16(ptr addrspace(1) %arg) nounwind noinline {
 ; GCN-LABEL: load_v3f32_align16:
 ; GCN:       ; %bb.0:
 ; GCN:         s_waitcnt lgkmcnt(0)
@@ -67,5 +67,3 @@ define protected amdgpu_kernel void @load_v3f32_align16(ptr addrspace(1) %arg) #
   store <3 x float> %vec, ptr addrspace(1) undef, align 16
   ret void
 }
-
-attributes #0 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-bitcast-function.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-bitcast-function.ll
index 25a2924bef541a..32ee4231f364fc 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-bitcast-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-bitcast-function.ll
@@ -7,18 +7,15 @@
 ; GCN: foo1 at gotpcrel32@lo+4
 ; GCN: foo1 at gotpcrel32@hi+12
 
-define void @foo1(i32 %x) #1 {
+define void @foo1(i32 %x) noinline nounwind "target-features"="+wavefrontsize64" {
 entry:
   %cc = icmp eq i32 %x, 0
   store volatile i1 %cc, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @kernel1(float %x) #0 {
+define amdgpu_kernel void @kernel1(float %x) nounwind "target-features"="+wavefrontsize32" {
 entry:
   call void @foo1(float %x)
   ret void
 }
-
-attributes #0 = { nounwind "target-features"="+wavefrontsize32" }
-attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
index d070dc3b770f81..68da5008c89ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
@@ -13,7 +13,7 @@ define internal void @default_to_1_256() {
   ret void
 }
 
-define amdgpu_kernel void @kernel_1_256() #0 {
+define amdgpu_kernel void @kernel_1_256() "amdgpu-flat-work-group-size"="1,256" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel_1_256
 ; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    call void @default_to_1_256()
@@ -32,7 +32,7 @@ define internal void @default_to_64_128() {
   ret void
 }
 
-define amdgpu_kernel void @kernel_64_128() #1 {
+define amdgpu_kernel void @kernel_64_128() "amdgpu-flat-work-group-size"="64,128" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel_64_128
 ; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:    call void @default_to_64_128()
@@ -59,7 +59,7 @@ define internal void @default_to_128_512() {
 
 ; This already has a strict bounds, but called from kernels with wider
 ; bounds, and should not be changed.
-define internal void @flat_group_64_64() #2 {
+define internal void @flat_group_64_64() "amdgpu-flat-work-group-size"="64,64" {
 ; CHECK-LABEL: define {{[^@]+}}@flat_group_64_64
 ; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -68,7 +68,7 @@ define internal void @flat_group_64_64() #2 {
 }
 
 ; 128,256 -> 128,128
-define internal void @flat_group_128_256() #3 {
+define internal void @flat_group_128_256() "amdgpu-flat-work-group-size"="128,256" {
 ; CHECK-LABEL: define {{[^@]+}}@flat_group_128_256
 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -76,7 +76,7 @@ define internal void @flat_group_128_256() #3 {
   ret void
 }
 
-define internal void @flat_group_512_1024() #4 {
+define internal void @flat_group_512_1024() "amdgpu-flat-work-group-size"="512,1024" {
 ; CHECK-LABEL: define {{[^@]+}}@flat_group_512_1024
 ; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -84,7 +84,7 @@ define internal void @flat_group_512_1024() #4 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_128_512() #5 {
+define amdgpu_kernel void @kernel_128_512() "amdgpu-flat-work-group-size"="128,512" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel_128_512
 ; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:    call void @default_to_128_512()
@@ -96,7 +96,7 @@ define amdgpu_kernel void @kernel_128_512() #5 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_512_512() #6 {
+define amdgpu_kernel void @kernel_512_512() "amdgpu-flat-work-group-size"="512,512" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel_512_512
 ; CHECK-SAME: () #[[ATTR5]] {
 ; CHECK-NEXT:    call void @default_to_128_512()
@@ -119,7 +119,7 @@ define internal void @default_to_64_256() {
 
 ; The kernel's lower bound is higher than the callee's lower bound, so
 ; this should probably be illegal.
-define amdgpu_kernel void @kernel_128_256() #3 {
+define amdgpu_kernel void @kernel_128_256() "amdgpu-flat-work-group-size"="128,256" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel_128_256
 ; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
 ; CHECK-NEXT:    call void @default_to_64_256()
@@ -130,7 +130,7 @@ define amdgpu_kernel void @kernel_128_256() #3 {
 }
 
 ; 64,128 -> 64,128
-define internal void @merge_cycle_0() #1 {
+define internal void @merge_cycle_0() "amdgpu-flat-work-group-size"="64,128" {
 ; CHECK-LABEL: define {{[^@]+}}@merge_cycle_0
 ; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:    call void @merge_cycle_1()
@@ -141,7 +141,7 @@ define internal void @merge_cycle_0() #1 {
 }
 
 ; 128,256 -> 128,128
-define internal void @merge_cycle_1() #3 {
+define internal void @merge_cycle_1() "amdgpu-flat-work-group-size"="128,256" {
 ; CHECK-LABEL: define {{[^@]+}}@merge_cycle_1
 ; CHECK-SAME: () #[[ATTR4]] {
 ; CHECK-NEXT:    call void @merge_cycle_0()
@@ -151,7 +151,7 @@ define internal void @merge_cycle_1() #3 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_64_256() #7 {
+define amdgpu_kernel void @kernel_64_256() "amdgpu-flat-work-group-size"="64,256" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel_64_256
 ; CHECK-SAME: () #[[ATTR6]] {
 ; CHECK-NEXT:    call void @merge_cycle_0()
@@ -193,15 +193,6 @@ define internal i32 @bitcasted_function() {
 ;
   ret i32 0
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
-attributes #1 = { "amdgpu-flat-work-group-size"="64,128" }
-attributes #2 = { "amdgpu-flat-work-group-size"="64,64" }
-attributes #3 = { "amdgpu-flat-work-group-size"="128,256" }
-attributes #4 = { "amdgpu-flat-work-group-size"="512,1024" }
-attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
-attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
-attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
index f62f1d57aec8e4..e446192e51452e 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
@@ -12,7 +12,7 @@ define internal void @default_to_1_8_a() {
   ret void
 }
 
-define amdgpu_kernel void @kernel_1_8() #0 {
+define amdgpu_kernel void @kernel_1_8() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,8" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_8
 ; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    call void @default_to_1_8_a()
@@ -31,7 +31,7 @@ define internal void @default_to_1_2() {
   ret void
 }
 
-define amdgpu_kernel void @kernel_1_2() #1 {
+define amdgpu_kernel void @kernel_1_2() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2
 ; CHECK-SAME: () #[[ATTR1]] {
 ; CHECK-NEXT:    call void @default_to_1_2()
@@ -56,7 +56,7 @@ define internal void @default_to_1_4() {
   ret void
 }
 
-define amdgpu_kernel void @kernel_1_4() #2 {
+define amdgpu_kernel void @kernel_1_4() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,4" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_4
 ; CHECK-SAME: () #[[ATTR2]] {
 ; CHECK-NEXT:    call void @default_to_1_4()
@@ -77,7 +77,7 @@ define internal void @default_to_2_9() {
 
 ; This already has strict bounds, but called from kernels with wider
 ; bounds, and should not be changed.
-define internal void @flat_group_1_1() #3 {
+define internal void @flat_group_1_1() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,1" {
 ; CHECK-LABEL: define internal void @flat_group_1_1
 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -86,7 +86,7 @@ define internal void @flat_group_1_1() #3 {
 }
 
 ; 2,8 -> 2,2
-define internal void @flat_group_2_8() #4 {
+define internal void @flat_group_2_8() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,8" {
 ; CHECK-LABEL: define internal void @flat_group_2_8
 ; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -95,7 +95,7 @@ define internal void @flat_group_2_8() #4 {
 }
 
 ; 9,10 -> 9,9
-define internal void @flat_group_9_10() #5 {
+define internal void @flat_group_9_10() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,10" {
 ; CHECK-LABEL: define internal void @flat_group_9_10
 ; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -103,7 +103,7 @@ define internal void @flat_group_9_10() #5 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_2_9() #6 {
+define amdgpu_kernel void @kernel_2_9() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,9" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_2_9
 ; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:    call void @default_to_2_9()
@@ -115,7 +115,7 @@ define amdgpu_kernel void @kernel_2_9() #6 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_9_9() #7 {
+define amdgpu_kernel void @kernel_9_9() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,9" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_9_9
 ; CHECK-SAME: () #[[ATTR6]] {
 ; CHECK-NEXT:    call void @default_to_2_9()
@@ -138,7 +138,7 @@ define internal void @default_to_1_8_b() {
 
 ; The kernel's lower bound is higher than the callee's lower bound, so
 ; this should probably be illegal.
-define amdgpu_kernel void @kernel_2_8() #4 {
+define amdgpu_kernel void @kernel_2_8() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,8" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_2_8
 ; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
 ; CHECK-NEXT:    call void @default_to_1_8_a()
@@ -151,7 +151,7 @@ define amdgpu_kernel void @kernel_2_8() #4 {
 }
 
 ; 1,2 -> 2,2
-define internal void @merge_cycle_0() #1 {
+define internal void @merge_cycle_0() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" {
 ; CHECK-LABEL: define internal void @merge_cycle_0
 ; CHECK-SAME: () #[[ATTR5]] {
 ; CHECK-NEXT:    call void @merge_cycle_1()
@@ -163,7 +163,7 @@ define internal void @merge_cycle_0() #1 {
 
 ; Called from 1,2 + 3,8
 ; 2,8 -> 2,8
-define internal void @merge_cycle_1() #4 {
+define internal void @merge_cycle_1() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,8" {
 ; CHECK-LABEL: define internal void @merge_cycle_1
 ; CHECK-SAME: () #[[ATTR7]] {
 ; CHECK-NEXT:    call void @merge_cycle_0()
@@ -173,7 +173,7 @@ define internal void @merge_cycle_1() #4 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_3_8() #8 {
+define amdgpu_kernel void @kernel_3_8() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="3,8" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_3_8
 ; CHECK-SAME: () #[[ATTR8:[0-9]+]] {
 ; CHECK-NEXT:    call void @merge_cycle_0()
@@ -233,7 +233,7 @@ define internal void @called_from_invalid_bounds_1() {
 }
 
 ; Invalid range for amdgpu-waves-per-eu
-define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 {
+define amdgpu_kernel void @kernel_invalid_bounds_0_8() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="0,8" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_0_8
 ; CHECK-SAME: () #[[ATTR11:[0-9]+]] {
 ; CHECK-NEXT:    call void @called_from_invalid_bounds_0()
@@ -244,7 +244,7 @@ define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 {
 }
 
 ; Invalid range for amdgpu-waves-per-eu
-define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 {
+define amdgpu_kernel void @kernel_invalid_bounds_1_123() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,123" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_1_123
 ; CHECK-SAME: () #[[ATTR12:[0-9]+]] {
 ; CHECK-NEXT:    call void @called_from_invalid_bounds_1()
@@ -258,7 +258,7 @@ define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 {
 ; The 512 maximum workgroup size implies a minimum occupancy of 2. The
 ; implied minimum waves-per-eu should not be 3
 ; -> 2,10
-define void @larger_group_size_implies_lower_minimum() #11 {
+define void @larger_group_size_implies_lower_minimum() "amdgpu-flat-work-group-size"="1,512" {
 ; CHECK-LABEL: define void @larger_group_size_implies_lower_minimum
 ; CHECK-SAME: () #[[ATTR13:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -266,7 +266,7 @@ define void @larger_group_size_implies_lower_minimum() #11 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_3_6() #12 {
+define amdgpu_kernel void @kernel_3_6() "amdgpu-flat-work-group-size"="1,512" "amdgpu-waves-per-eu"="3,6" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_3_6
 ; CHECK-SAME: () #[[ATTR14:[0-9]+]] {
 ; CHECK-NEXT:    call void @larger_group_size_implies_lower_minimum()
@@ -277,7 +277,7 @@ define amdgpu_kernel void @kernel_3_6() #12 {
 }
 
 ; 3,6 -> 6,9
-define internal void @refine_upper_func_3_6() #13 {
+define internal void @refine_upper_func_3_6() "amdgpu-waves-per-eu"="3,6" {
 ; CHECK-LABEL: define internal void @refine_upper_func_3_6
 ; CHECK-SAME: () #[[ATTR15:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -286,7 +286,7 @@ define internal void @refine_upper_func_3_6() #13 {
 }
 
 ; 4,8 -> 6,8
-define internal void @refine_lower_func_4_8() #14 {
+define internal void @refine_lower_func_4_8() "amdgpu-waves-per-eu"="4,8" {
 ; CHECK-LABEL: define internal void @refine_lower_func_4_8
 ; CHECK-SAME: () #[[ATTR16:[0-9]+]] {
 ; CHECK-NEXT:    call void @refine_upper_func_3_6()
@@ -296,7 +296,7 @@ define internal void @refine_lower_func_4_8() #14 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_foo_6_8() #15 {
+define amdgpu_kernel void @kernel_foo_6_8() "amdgpu-waves-per-eu"="6,8" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_foo_6_8
 ; CHECK-SAME: () #[[ATTR16]] {
 ; CHECK-NEXT:    call void @refine_upper_func_3_6()
@@ -311,7 +311,7 @@ define amdgpu_kernel void @kernel_foo_6_8() #15 {
 }
 
 ; 5,5 -> 5,5
-define internal void @func_5_5() #16 {
+define internal void @func_5_5() "amdgpu-waves-per-eu"="5,5" {
 ; CHECK-LABEL: define internal void @func_5_5
 ; CHECK-SAME: () #[[ATTR17:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -320,7 +320,7 @@ define internal void @func_5_5() #16 {
 }
 
 ; 5,8 -> 8,8
-define internal void @func_5_8() #17 {
+define internal void @func_5_8() "amdgpu-waves-per-eu"="5,8" {
 ; CHECK-LABEL: define internal void @func_5_8
 ; CHECK-SAME: () #[[ATTR18:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -329,7 +329,7 @@ define internal void @func_5_8() #17 {
 }
 
 ; 9,10 -> 9,10
-define internal void @func_9_10_a() #18 {
+define internal void @func_9_10_a() "amdgpu-waves-per-eu"="9,10" {
 ; CHECK-LABEL: define internal void @func_9_10_a
 ; CHECK-SAME: () #[[ATTR19:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -338,7 +338,7 @@ define internal void @func_9_10_a() #18 {
 }
 
 ; 9,10 -> 9,9
-define internal void @func_9_10_b() #18 {
+define internal void @func_9_10_b() "amdgpu-waves-per-eu"="9,10" {
 ; CHECK-LABEL: define internal void @func_9_10_b
 ; CHECK-SAME: () #[[ATTR20:[0-9]+]] {
 ; CHECK-NEXT:    ret void
@@ -346,7 +346,7 @@ define internal void @func_9_10_b() #18 {
   ret void
 }
 
-define amdgpu_kernel void @kernel_bar_8_9() #19 {
+define amdgpu_kernel void @kernel_bar_8_9() "amdgpu-waves-per-eu"="8,9" {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_bar_8_9
 ; CHECK-SAME: () #[[ATTR21:[0-9]+]] {
 ; CHECK-NEXT:    call void @refine_upper_func_3_6()
@@ -377,27 +377,6 @@ define void @externally_visible() {
 
 ; Use a 1 wave workgroup so there is no interaction by the workgroup
 ; size on the implied waves per EU.
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,8" }
-attributes #1 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" }
-attributes #2 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,4" }
-attributes #3 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,1" }
-attributes #4 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,8" }
-attributes #5 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,10" }
-attributes #6 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,9" }
-attributes #7 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,9" }
-attributes #8 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="3,8" }
-attributes #9 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="0,8" }
-attributes #10 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,123" }
-attributes #11 = { "amdgpu-flat-work-group-size"="1,512" }
-attributes #12 = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-waves-per-eu"="3,6" }
-attributes #13 = { "amdgpu-waves-per-eu"="3,6" }
-attributes #14 = { "amdgpu-waves-per-eu"="4,8" }
-attributes #15 = { "amdgpu-waves-per-eu"="6,8" }
-attributes #16 = { "amdgpu-waves-per-eu"="5,5" }
-attributes #17 = { "amdgpu-waves-per-eu"="5,8" }
-attributes #18 = { "amdgpu-waves-per-eu"="9,10" }
-attributes #19 = { "amdgpu-waves-per-eu"="8,9" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/ps-shader-arg-count.ll b/llvm/test/CodeGen/AMDGPU/ps-shader-arg-count.ll
index f3c3acc317ce15..48b3a1dd6fb625 100644
--- a/llvm/test/CodeGen/AMDGPU/ps-shader-arg-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/ps-shader-arg-count.ll
@@ -3,7 +3,7 @@
 
 ; ;CHECK-LABEL: {{^}}_amdgpu_ps_1_arg:
 ; ;CHECK: NumVgprs: 4
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_1_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_1_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i1 = extractelement <2 x float> %arg3, i32 1
   %ret1 = insertelement <4 x float> undef, float %i1, i32 0
@@ -13,7 +13,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_1_arg(i32 inreg %arg, i32
 
 ; CHECK-LABEL: {{^}}_amdgpu_ps_3_arg:
 ; CHECK: NumVgprs: 6
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_3_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_3_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i1 = extractelement <2 x float> %arg3, i32 1
   %i2 = extractelement <2 x float> %arg4, i32 0
@@ -27,7 +27,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_3_arg(i32 inreg %arg, i32
 
 ; CHECK-LABEL: {{^}}_amdgpu_ps_2_arg_gap:
 ; CHECK: NumVgprs: 4
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_gap(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_gap(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i1 = extractelement <2 x float> %arg3, i32 1
   %i3 = extractelement <2 x float> %arg5, i32 1
@@ -41,7 +41,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_gap(i32 inreg %arg,
 ; adjacent to arg1 (the only 2 used arguments)
 ; CHECK-LABEL: {{^}}_amdgpu_ps_2_arg_no_pack:
 ; CHECK: NumVgprs: 6
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_no_pack(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #1 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_no_pack(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr nounwind "InitialPSInputAddr"="2" "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i1 = extractelement <2 x float> %arg3, i32 1
   %i3 = extractelement <2 x float> %arg5, i32 1
@@ -53,7 +53,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_no_pack(i32 inreg %
 
 ; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg:
 ; CHECK: NumVgprs: 24
-define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i1 = extractelement <2 x float> %arg3, i32 1
   %i2 = extractelement <2 x float> %arg4, i32 0
@@ -99,7 +99,7 @@ define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float>
 ; Extra arguments have to be allocated even if they're unused
 ; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg_extra_unused:
 ; CHECK: NumVgprs: 26
-define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i1 = extractelement <2 x float> %arg3, i32 1
   %i2 = extractelement <2 x float> %arg4, i32 0
@@ -145,7 +145,7 @@ define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float>
 ; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg_extra:
 ; CHECK: NumVgprs: 26
 ; CHECK: NumVGPRsForWavesPerEU: 26
-define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i1 = extractelement <2 x float> %arg3, i32 1
   %i2 = extractelement <2 x float> %arg4, i32 0
@@ -194,7 +194,7 @@ define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float>
 ; Check that when no input args are used we get the minimum allocation - note that we always enable the first input
 ; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused:
 ; CHECK: NumVgprs: 4
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   ret { <4 x float> } undef
 }
@@ -203,14 +203,14 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused(i32 inreg %arg
 ; Additionally set the PSInputAddr to 0 via the metadata
 ; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_ia0:
 ; CHECK: NumVgprs: 4
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_ia0(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #3 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_ia0(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr nounwind "InitialPSInputAddr"="0" "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   ret { <4 x float> } undef
 }
 
 ; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_used:
 ; CHECK: NumVgprs: 4
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %ret4.1 = insertelement <4 x float> undef, float %extra_arg1, i32 0
   %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg2, i32 1
@@ -222,7 +222,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used(i32
 
 ; CHECK-LABEL: {{^}}_amdgpu_ps_part_unused_extra_used:
 ; CHECK: NumVgprs: 5
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %ret4.1 = insertelement <4 x float> undef, float %arg14, i32 0
   %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg1, i32 1
@@ -235,7 +235,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_used(i3
 
 ; CHECK-LABEL: {{^}}_amdgpu_ps_part_unused_extra_unused:
 ; CHECK: NumVgprs: 7
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %ret4.1 = insertelement <4 x float> undef, float %arg12, i32 0
   %ret4.2 = insertelement <4 x float> %ret4.1, float %arg13, i32 1
@@ -249,7 +249,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_unused(
 ; Extra unused inputs are always added to the allocation
 ; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_unused:
 ; CHECK: NumVgprs: 4
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
 
   ret { <4 x float> } undef
@@ -257,7 +257,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused(i
 
 ; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_used_no_packing:
 ; CHECK: NumVgprs: 26
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "InitialPSInputAddr"="0xffff" "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %ret4.1 = insertelement <4 x float> undef, float %extra_arg1, i32 0
   %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg2, i32 1
@@ -269,7 +269,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used_no_
 
 ; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_unused_no_packing:
 ; CHECK: NumVgprs: 26
-define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 {
+define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "InitialPSInputAddr"="0xffff" "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   ret { <4 x float> } undef
 }
@@ -277,7 +277,7 @@ define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused_n
 ; CHECK-LABEL: {{^}}_amdgpu_ps_some_unused_arg_extra:
 ; CHECK: NumVgprs: 24
 ; CHECK: NumVGPRsForWavesPerEU: 24
-define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
+define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i1 = extractelement <2 x float> %arg3, i32 1
   %i2 = extractelement <2 x float> %arg4, i32 0
@@ -321,7 +321,7 @@ define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float>
 ;CHECK-LABEL: {{^}}_amdgpu_ps_some_unused_no_packing_arg_extra:
 ;CHECK: NumVgprs: 26
 ;CHECK: NumVGPRsForWavesPerEU: 26
-define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_no_packing_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 {
+define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_no_packing_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr nounwind "InitialPSInputAddr"="0xffff" "target-features"=",+wavefrontsize64,+cumode" {
 .entry:
   %i1 = extractelement <2 x float> %arg3, i32 1
   %i2 = extractelement <2 x float> %arg4, i32 0
@@ -356,8 +356,3 @@ define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float>
 
   ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res
 }
-
-attributes #0 = { nounwind "target-features"=",+wavefrontsize64,+cumode"  }
-attributes #1 = { nounwind "InitialPSInputAddr"="2" "target-features"=",+wavefrontsize64,+cumode" }
-attributes #2 = { nounwind "InitialPSInputAddr"="0xffff" "target-features"=",+wavefrontsize64,+cumode" }
-attributes #3 = { nounwind "InitialPSInputAddr"="0" "target-features"=",+wavefrontsize64,+cumode" }
diff --git a/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll
index e29f09dcac0248..4f3d039c022394 100644
--- a/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll
@@ -6,7 +6,7 @@
 ; The 64-bit pointer argument %arg1 will be split into two registers
 ; and for its llvm.dbg.declare, DAG should emit two DBG_VALUE instructions
 ; with the fragment expressions.
-define hidden void @ptr_arg_split_subregs(ptr %arg1) #0 !dbg !9 {
+define hidden void @ptr_arg_split_subregs(ptr %arg1) nounwind !dbg !9 {
 ; CHECK-LABEL: ptr_arg_split_subregs:
 ; CHECK:       .Lfunc_begin0:
 ; CHECK-NEXT:    .file 1 "temp" "example.cpp"
@@ -37,7 +37,7 @@ define hidden void @ptr_arg_split_subregs(ptr %arg1) #0 !dbg !9 {
 ; are totally misleading. The former represent part of the incoming argument in register
 ; while the latter was emitted for the parameter copy to a virtual register inserted
 ; at the function entry by DAGBuilder.
-define hidden void @ptr_arg_split_reg_mem(<30 x i32>, ptr %arg2) #0 !dbg !25 {
+define hidden void @ptr_arg_split_reg_mem(<30 x i32>, ptr %arg2) nounwind !dbg !25 {
 ; CHECK-LABEL: ptr_arg_split_reg_mem:
 ; CHECK:       .Lfunc_begin1:
 ; CHECK-NEXT:    .loc 1 10 0 ; example.cpp:10:0
@@ -63,7 +63,7 @@ define hidden void @ptr_arg_split_reg_mem(<30 x i32>, ptr %arg2) #0 !dbg !25 {
 
 ; FIXME: The 64-bit pointer argument %arg3 will be entirely in the stack memory.
 ; No DBG_VALUE emitted for the incoming argument in this case and it should be fixed.
-define hidden void @ptr_arg_in_memory(<32 x i32>, ptr %arg3) #0 !dbg !31 {
+define hidden void @ptr_arg_in_memory(<32 x i32>, ptr %arg3) nounwind !dbg !31 {
 ; CHECK-LABEL: ptr_arg_in_memory:
 ; CHECK:       .Lfunc_begin2:
 ; CHECK-NEXT:    .loc 1 15 0 ; example.cpp:15:0
@@ -87,9 +87,7 @@ define hidden void @ptr_arg_in_memory(<32 x i32>, ptr %arg3) #0 !dbg !31 {
   ret void, !dbg !36
 }
 
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
-
-attributes #0 = { nounwind }
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !6, !7}
diff --git a/llvm/test/CodeGen/AMDGPU/ptrmask.ll b/llvm/test/CodeGen/AMDGPU/ptrmask.ll
index 70622706789331..3382371566884d 100644
--- a/llvm/test/CodeGen/AMDGPU/ptrmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptrmask.ll
@@ -65,10 +65,8 @@ define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3)
   ret ptr addrspace(3) %masked
 }
 
-declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
-declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) nounwind readnone speculatable willreturn
+declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) nounwind readnone speculatable willreturn
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX10: {{.*}}
 ; GFX11: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/pv-packing.ll b/llvm/test/CodeGen/AMDGPU/pv-packing.ll
index 1d6c751278c68f..c2d67dc6f3120f 100644
--- a/llvm/test/CodeGen/AMDGPU/pv-packing.ll
+++ b/llvm/test/CodeGen/AMDGPU/pv-packing.ll
@@ -53,9 +53,7 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
+declare float @llvm.r600.dot4(<4 x float>, <4 x float>) readnone
 
 
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/pv.ll b/llvm/test/CodeGen/AMDGPU/pv.ll
index 856457aa58cbf2..b7c78388edbaab 100644
--- a/llvm/test/CodeGen/AMDGPU/pv.ll
+++ b/llvm/test/CodeGen/AMDGPU/pv.ll
@@ -224,13 +224,10 @@ main_body:
   ret void
 }
 
-declare float @llvm.minnum.f32(float, float) #0
-declare float @llvm.maxnum.f32(float, float) #0
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.r600.recipsqrt.clamped.f32(float) #0
-declare float @llvm.pow.f32(float, float) #0
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #1
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare float @llvm.r600.dot4(<4 x float>, <4 x float>) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.r600.recipsqrt.clamped.f32(float) nounwind readnone
+declare float @llvm.pow.f32(float, float) nounwind readnone
+declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll b/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
index e6c068f5c5b12d..6b7f05ef8f7a5e 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
@@ -17,12 +17,10 @@
 ; CHECK: Other: 0
 ; CHECK: Section: .text (0x2)
 ; CHECK: }
-define amdgpu_kernel void @test_constant_array_fixup(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @test_constant_array_fixup(ptr addrspace(1) nocapture %out, i32 %idx) nounwind {
 entry:
   %arrayidx = getelementptr inbounds [4 x i32], ptr addrspace(4) @arr, i32 0, i32 %idx
   %val = load i32, ptr addrspace(4) %arrayidx
   store i32 %val, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll b/llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
index 54cc43ce36b9fc..05c84e1d5ad706 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
@@ -45,14 +45,12 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.cube(<4 x float>) #0
+declare <4 x float> @llvm.r600.cube(<4 x float>) nounwind readnone
 
 ; Function Attrs: readnone
-declare float @fabsf(float) #0
+declare float @fabsf(float) nounwind readnone
 
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.texc(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare <4 x float> @llvm.r600.texc(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/r600.add.ll b/llvm/test/CodeGen/AMDGPU/r600.add.ll
index efa3b80faccc4a..6f60b60b9887ed 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.add.ll
@@ -2,7 +2,7 @@
 
 ; FUNC-LABEL: {{^}}s_add_i32:
 ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @s_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
   %a = load i32, ptr addrspace(1) %in
   %b = load i32, ptr addrspace(1) %b_ptr
@@ -78,7 +78,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v_add_i32:
-define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x()
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
   %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
@@ -90,7 +90,7 @@ define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in
 }
 
 ; FUNC-LABEL: {{^}}v_add_imm_i32:
-define amdgpu_kernel void @v_add_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_add_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x()
   %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
   %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
@@ -161,7 +161,4 @@ endif:
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
index 52b0eaf23ef2b8..275b336002c11a 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
@@ -5,7 +5,7 @@
 ; EG: MEM_RAT_CACHELESS STORE_RAW T1.X
 define amdgpu_kernel void @tgid_x(ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.tgid.x() #0
+  %0 = call i32 @llvm.r600.read.tgid.x() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -15,7 +15,7 @@ entry:
 ; EG: MOV [[REG]].X, T1.Y
 define amdgpu_kernel void @tgid_y(ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.tgid.y() #0
+  %0 = call i32 @llvm.r600.read.tgid.y() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -25,7 +25,7 @@ entry:
 ; EG: MOV [[REG]].X, T1.Z
 define amdgpu_kernel void @tgid_z(ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.tgid.z() #0
+  %0 = call i32 @llvm.r600.read.tgid.z() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -34,7 +34,7 @@ entry:
 ; EG: MEM_RAT_CACHELESS STORE_RAW T0.X
 define amdgpu_kernel void @tidig_x(ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.tidig.x() #0
+  %0 = call i32 @llvm.r600.read.tidig.x() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -44,7 +44,7 @@ entry:
 ; EG: MOV [[REG]].X, T0.Y
 define amdgpu_kernel void @tidig_y(ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.tidig.y() #0
+  %0 = call i32 @llvm.r600.read.tidig.y() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -54,7 +54,7 @@ entry:
 ; EG: MOV [[REG]].X, T0.Z
 define amdgpu_kernel void @tidig_z(ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.r600.read.tidig.z() #0
+  %0 = call i32 @llvm.r600.read.tidig.z() readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -84,14 +84,12 @@ define amdgpu_kernel void @test_implicit_dyn(ptr addrspace(1) %out, i32 %in) #1
   ret void
 }
 
-declare ptr addrspace(7) @llvm.r600.implicitarg.ptr() #0
+declare ptr addrspace(7) @llvm.r600.implicitarg.ptr() readnone
 
-declare i32 @llvm.r600.read.tgid.x() #0
-declare i32 @llvm.r600.read.tgid.y() #0
-declare i32 @llvm.r600.read.tgid.z() #0
+declare i32 @llvm.r600.read.tgid.x() readnone
+declare i32 @llvm.r600.read.tgid.y() readnone
+declare i32 @llvm.r600.read.tgid.z() readnone
 
-declare i32 @llvm.r600.read.tidig.x() #0
-declare i32 @llvm.r600.read.tidig.y() #0
-declare i32 @llvm.r600.read.tidig.z() #0
-
-attributes #0 = { readnone }
+declare i32 @llvm.r600.read.tidig.x() readnone
+declare i32 @llvm.r600.read.tidig.y() readnone
+declare i32 @llvm.r600.read.tidig.z() readnone
diff --git a/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll b/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
index ce46e742ff4b12..66bc4a1410abef 100644
--- a/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
+++ b/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
@@ -2,7 +2,7 @@
 
 ; GCN-LABEL: {{^}}rcp_uint:
 ; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_uint(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @rcp_uint(ptr addrspace(1) %in, ptr addrspace(1) %out) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %load = load i32, ptr addrspace(1) %in, align 4
   %cvt = uitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt, !fpmath !0
@@ -12,7 +12,7 @@ define amdgpu_kernel void @rcp_uint(ptr addrspace(1) %in, ptr addrspace(1) %out)
 
 ; GCN-LABEL: {{^}}rcp_sint:
 ; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_sint(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @rcp_sint(ptr addrspace(1) %in, ptr addrspace(1) %out) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %load = load i32, ptr addrspace(1) %in, align 4
   %cvt = sitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt, !fpmath !0
@@ -22,7 +22,7 @@ define amdgpu_kernel void @rcp_sint(ptr addrspace(1) %in, ptr addrspace(1) %out)
 
 ; GCN-LABEL: {{^}}rcp_uint_denorm:
 ; GCN-NOT: v_rcp_iflag_f32
-define amdgpu_kernel void @rcp_uint_denorm(ptr addrspace(1) %in, ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @rcp_uint_denorm(ptr addrspace(1) %in, ptr addrspace(1) %out) "denormal-fp-math-f32"="ieee,ieee" {
   %load = load i32, ptr addrspace(1) %in, align 4
   %cvt = uitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt
@@ -32,7 +32,7 @@ define amdgpu_kernel void @rcp_uint_denorm(ptr addrspace(1) %in, ptr addrspace(1
 
 ; GCN-LABEL: {{^}}rcp_sint_denorm:
 ; GCN-NOT: v_rcp_iflag_f32
-define amdgpu_kernel void @rcp_sint_denorm(ptr addrspace(1) %in, ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @rcp_sint_denorm(ptr addrspace(1) %in, ptr addrspace(1) %out) "denormal-fp-math-f32"="ieee,ieee" {
   %load = load i32, ptr addrspace(1) %in, align 4
   %cvt = sitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt
@@ -41,6 +41,3 @@ define amdgpu_kernel void @rcp_sint_denorm(ptr addrspace(1) %in, ptr addrspace(1
 }
 
 !0 = !{float 2.500000e+00}
-
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { "denormal-fp-math-f32"="ieee,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/read_register.ll b/llvm/test/CodeGen/AMDGPU/read_register.ll
index 227a08ecc4dbe2..56c4a8e4b5e96f 100644
--- a/llvm/test/CodeGen/AMDGPU/read_register.ll
+++ b/llvm/test/CodeGen/AMDGPU/read_register.ll
@@ -1,14 +1,14 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.read_register.i32(metadata) #0
-declare i64 @llvm.read_register.i64(metadata) #0
+declare i32 @llvm.read_register.i32(metadata) nounwind
+declare i64 @llvm.read_register.i64(metadata) nounwind
 
 ; CHECK-LABEL: {{^}}test_read_m0:
 ; CHECK: s_mov_b32 m0, -1
 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[COPY]]
-define amdgpu_kernel void @test_read_m0(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_read_m0(ptr addrspace(1) %out) nounwind {
   store volatile i32 0, ptr addrspace(3) undef
   %m0 = call i32 @llvm.read_register.i32(metadata !0)
   store i32 %m0, ptr addrspace(1) %out
@@ -19,7 +19,7 @@ define amdgpu_kernel void @test_read_m0(ptr addrspace(1) %out) #0 {
 ; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], exec_lo
 ; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], exec_hi
 ; CHECK: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_read_exec(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_read_exec(ptr addrspace(1) %out) nounwind {
   %exec = call i64 @llvm.read_register.i64(metadata !1)
   store i64 %exec, ptr addrspace(1) %out
   ret void
@@ -29,7 +29,7 @@ define amdgpu_kernel void @test_read_exec(ptr addrspace(1) %out) #0 {
 ; CHECK: v_mov_b32_e32 v[[LO:[0-9]+]], flat_scratch_lo
 ; CHECK: v_mov_b32_e32 v[[HI:[0-9]+]], flat_scratch_hi
 ; CHECK: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_read_flat_scratch(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_read_flat_scratch(ptr addrspace(1) %out) nounwind {
   %flat_scratch = call i64 @llvm.read_register.i64(metadata !2)
   store i64 %flat_scratch, ptr addrspace(1) %out
   ret void
@@ -38,7 +38,7 @@ define amdgpu_kernel void @test_read_flat_scratch(ptr addrspace(1) %out) #0 {
 ; CHECK-LABEL: {{^}}test_read_flat_scratch_lo:
 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_lo
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[COPY]]
-define amdgpu_kernel void @test_read_flat_scratch_lo(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_read_flat_scratch_lo(ptr addrspace(1) %out) nounwind {
   %flat_scratch_lo = call i32 @llvm.read_register.i32(metadata !3)
   store i32 %flat_scratch_lo, ptr addrspace(1) %out
   ret void
@@ -47,7 +47,7 @@ define amdgpu_kernel void @test_read_flat_scratch_lo(ptr addrspace(1) %out) #0 {
 ; CHECK-LABEL: {{^}}test_read_flat_scratch_hi:
 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], flat_scratch_hi
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[COPY]]
-define amdgpu_kernel void @test_read_flat_scratch_hi(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_read_flat_scratch_hi(ptr addrspace(1) %out) nounwind {
   %flat_scratch_hi = call i32 @llvm.read_register.i32(metadata !4)
   store i32 %flat_scratch_hi, ptr addrspace(1) %out
   ret void
@@ -56,7 +56,7 @@ define amdgpu_kernel void @test_read_flat_scratch_hi(ptr addrspace(1) %out) #0 {
 ; CHECK-LABEL: {{^}}test_read_exec_lo:
 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_lo
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[COPY]]
-define amdgpu_kernel void @test_read_exec_lo(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_read_exec_lo(ptr addrspace(1) %out) nounwind {
   %exec_lo = call i32 @llvm.read_register.i32(metadata !5)
   store i32 %exec_lo, ptr addrspace(1) %out
   ret void
@@ -65,14 +65,12 @@ define amdgpu_kernel void @test_read_exec_lo(ptr addrspace(1) %out) #0 {
 ; CHECK-LABEL: {{^}}test_read_exec_hi:
 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], exec_hi
 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[COPY]]
-define amdgpu_kernel void @test_read_exec_hi(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_read_exec_hi(ptr addrspace(1) %out) nounwind {
   %exec_hi = call i32 @llvm.read_register.i32(metadata !6)
   store i32 %exec_hi, ptr addrspace(1) %out
   ret void
 }
 
-attributes #0 = { nounwind }
-
 !0 = !{!"m0"}
 !1 = !{!"exec"}
 !2 = !{!"flat_scratch"}
diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
index fd422b344d8343..9cccb8580d2574 100644
--- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
+++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
@@ -11,7 +11,7 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 
-declare i64 @llvm.readcyclecounter() #0
+declare i64 @llvm.readcyclecounter() nounwind
 
 ; GCN-LABEL: {{^}}test_readcyclecounter:
 ; MEMTIME-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
@@ -37,7 +37,7 @@ declare i64 @llvm.readcyclecounter() #0
 ; GETREG:      v_mov_b32_e32 v[[VCNT2:[0-9]+]], [[CNT2]]
 ; GETREG:      global_store_{{dwordx2|b64}} v{{.+}}, v[[[VCNT2]]:[[ZERO]]]
 
-define amdgpu_kernel void @test_readcyclecounter(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_readcyclecounter(ptr addrspace(1) %out) nounwind {
   %cycle0 = call i64 @llvm.readcyclecounter()
   store volatile i64 %cycle0, ptr addrspace(1) %out
 
@@ -57,12 +57,10 @@ define amdgpu_kernel void @test_readcyclecounter(ptr addrspace(1) %out) #0 {
 ; GETREG-DAG:  s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_SHADER_CYCLES, 0, 20)
 ; GFX12:       s_cmp_eq_u32 [[HI1]], [[HI2]]
 ; GFX12:       s_cselect_b32 {{s[0-9]+}}, [[LO1]], 0
-define amdgpu_cs i32 @test_readcyclecounter_smem(ptr addrspace(4) inreg %in) #0 {
+define amdgpu_cs i32 @test_readcyclecounter_smem(ptr addrspace(4) inreg %in) nounwind {
   %cycle0 = call i64 @llvm.readcyclecounter()
   %in.v = load i64, ptr addrspace(4) %in
   %r.64 = add i64 %cycle0, %in.v
   %r.32 = trunc i64 %r.64 to i32
   ret i32 %r.32
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll b/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
index 15f664c98182ae..8adef92dfda68d 100644
--- a/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
+++ b/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
 
-declare i64 @llvm.readsteadycounter() #0
+declare i64 @llvm.readsteadycounter() nounwind
 
 ; GCN-LABEL: {{^}}test_readsteadycounter:
 ; GFX700: s_mov_b32 s[[REG:[0-9]+]], 0
@@ -12,7 +12,7 @@ declare i64 @llvm.readsteadycounter() #0
 ; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
 ; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
 ; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
-define amdgpu_kernel void @test_readsteadycounter(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_readsteadycounter(ptr addrspace(1) %out) nounwind {
   %cycle0 = call i64 @llvm.readsteadycounter()
   store volatile i64 %cycle0, ptr addrspace(1) %out
 
@@ -20,5 +20,3 @@ define amdgpu_kernel void @test_readsteadycounter(ptr addrspace(1) %out) #0 {
   store volatile i64 %cycle1, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
index 77e1694dbe7e19..5c42c95c7ae604 100644
--- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
+++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
@@ -3217,7 +3217,7 @@ define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) {
   ret i64 %add
 }
 
-define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) %i21, ptr addrspace(1) nocapture noundef writeonly align 4 %arg, i32 noundef %arg1) #1 {
+define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) %i21, ptr addrspace(1) nocapture noundef writeonly align 4 %arg, i32 noundef %arg1) mustprogress nofree nosync nounwind willreturn memory(read, argmem: readwrite, inaccessiblemem: none) {
 ; GFX67-LABEL: compute_mad:
 ; GFX67:       ; %bb.0: ; %bb
 ; GFX67-NEXT:    s_load_dword s3, s[0:1], 0x6
@@ -3890,15 +3890,11 @@ entry:
   ret i64 %f
 }
 
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #2
-declare i32 @llvm.amdgcn.workgroup.id.x() #2
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workitem.id.x() mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workgroup.id.x() mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
 declare i64 @llvm.amdgcn.mul.u24(i32, i32)
 declare i64 @llvm.amdgcn.mul.i24(i32, i32)
 
-attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
-attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(read, argmem: readwrite, inaccessiblemem: none) }
-attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-
 !0 = !{i32 0, i32 1024}
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll b/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
index 14e0203f59d1bf..6bb79390be2ad3 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
@@ -6,7 +6,7 @@
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, [[VAL]]
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @reduce_i64_load_align_4_width_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @reduce_i64_load_align_4_width_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i64, ptr addrspace(1) %in, align 4
   %and = and i64 %a, 1234567
   store i64 %and, ptr addrspace(1) %out, align 8
@@ -16,7 +16,7 @@ define amdgpu_kernel void @reduce_i64_load_align_4_width_to_i32(ptr addrspace(1)
 ; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt0:
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
 ; GCN: buffer_store_dword [[VAL]]
-define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i64, ptr addrspace(1) %in, align 4
   %vec = bitcast i64 %a to <2 x i32>
   %elt0 = extractelement <2 x i32> %vec, i32 0
@@ -27,12 +27,10 @@ define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt0(ptr addrspace(1
 ; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt1:
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
 ; GCN: buffer_store_dword [[VAL]]
-define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @reduce_i64_align_4_bitcast_v2i32_elt1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %a = load i64, ptr addrspace(1) %in, align 4
   %vec = bitcast i64 %a to <2 x i32>
   %elt0 = extractelement <2 x i32> %vec, i32 1
   store i32 %elt0, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll b/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
index 80a2aebcf75066..9bb245ac1d4d2c 100644
--- a/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll
@@ -3,7 +3,7 @@
 ; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4:
 ; GCN: s_load_dwordx2
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define amdgpu_kernel void @store_v2i32_as_v4i16_align_4(ptr addrspace(3) align 4 %out, <2 x i32> %x) #0 {
+define amdgpu_kernel void @store_v2i32_as_v4i16_align_4(ptr addrspace(3) align 4 %out, <2 x i32> %x) nounwind {
   %x.bc = bitcast <2 x i32> %x to <4 x i16>
   store <4 x i16> %x.bc, ptr addrspace(3) %out, align 4
   ret void
@@ -13,7 +13,7 @@ define amdgpu_kernel void @store_v2i32_as_v4i16_align_4(ptr addrspace(3) align 4
 ; GCN: s_load_dwordx4
 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define amdgpu_kernel void @store_v4i32_as_v8i16_align_4(ptr addrspace(3) align 4 %out, <4 x i32> %x) #0 {
+define amdgpu_kernel void @store_v4i32_as_v8i16_align_4(ptr addrspace(3) align 4 %out, <4 x i32> %x) nounwind {
   %x.bc = bitcast <4 x i32> %x to <8 x i16>
   store <8 x i16> %x.bc, ptr addrspace(3) %out, align 4
   ret void
@@ -22,7 +22,7 @@ define amdgpu_kernel void @store_v4i32_as_v8i16_align_4(ptr addrspace(3) align 4
 ; GCN-LABEL: {{^}}store_v2i32_as_i64_align_4:
 ; GCN: s_load_dwordx2
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define amdgpu_kernel void @store_v2i32_as_i64_align_4(ptr addrspace(3) align 4 %out, <2 x i32> %x) #0 {
+define amdgpu_kernel void @store_v2i32_as_i64_align_4(ptr addrspace(3) align 4 %out, <2 x i32> %x) nounwind {
   %x.bc = bitcast <2 x i32> %x to <4 x i16>
   store <4 x i16> %x.bc, ptr addrspace(3) %out, align 4
   ret void
@@ -32,7 +32,7 @@ define amdgpu_kernel void @store_v2i32_as_i64_align_4(ptr addrspace(3) align 4 %
 ; GCN: s_load_dwordx4
 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
 ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define amdgpu_kernel void @store_v4i32_as_v2i64_align_4(ptr addrspace(3) align 4 %out, <4 x i32> %x) #0 {
+define amdgpu_kernel void @store_v4i32_as_v2i64_align_4(ptr addrspace(3) align 4 %out, <4 x i32> %x) nounwind {
   %x.bc = bitcast <4 x i32> %x to <2 x i64>
   store <2 x i64> %x.bc, ptr addrspace(3) %out, align 4
   ret void
@@ -44,10 +44,8 @@ define amdgpu_kernel void @store_v4i32_as_v2i64_align_4(ptr addrspace(3) align 4
 ; GCN-NOT: {{buffer|flat|global}}
 
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}}
-define amdgpu_kernel void @store_v4i16_as_v2i32_align_4(ptr addrspace(3) align 4 %out, <4 x i16> %x) #0 {
+define amdgpu_kernel void @store_v4i16_as_v2i32_align_4(ptr addrspace(3) align 4 %out, <4 x i16> %x) nounwind {
   %x.bc = bitcast <4 x i16> %x to <2 x i32>
   store <2 x i32> %x.bc, ptr addrspace(3) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll b/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll
index 1f0e8c65a15e46..522cd000901350 100644
--- a/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll
@@ -4,9 +4,9 @@
 ; The register coalescer introduces a verifier error which later
 ; results in a crash during scheduling.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @reg_coalescer_breaks_dead(ptr addrspace(1) nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, i1 %c0) #1 {
+define amdgpu_kernel void @reg_coalescer_breaks_dead(ptr addrspace(1) nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, i1 %c0) nounwind {
 bb:
   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
   %cmp0 = icmp eq i32 %id.x, 0
@@ -38,6 +38,3 @@ bb15:                                             ; preds = %bb4
 bb16:                                             ; preds = %bb15, %bb4
   unreachable
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
index 09be927dc952e2..109875147332da 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-fail-unsatisfiable-overlapping-tuple-hints.mir
@@ -17,11 +17,9 @@
 # can only be corrected by adjusting to a non-overlapping register.
 
 --- |
-  define void @recolor_impossible_hint() #0 {
+  define void @recolor_impossible_hint() "amdgpu-waves-per-eu"="10,10" {
     ret void
   }
-
-  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
 ---
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
index 447a8bf9956f3e..c1c697422c1b35 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
@@ -12,12 +12,10 @@
 # subregister over the same range.
 
 --- |
-  define void @foo() #0 {
+  define void @foo() "amdgpu-waves-per-eu"="8,8" {
     ret void
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
-
 ...
 ---
 name:            foo
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll b/llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll
index 1b6e88524e969d..24e21c31664bfa 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-illegal-eviction-assert.ll
@@ -13,7 +13,7 @@
 ; CHECK: ; def v[4:19] v[20:27] v[0:4] v[0:3] a[0:15]
 ; CHECK: ; clobber
 ; CHECK: ; use v[4:19] v[20:27] v[0:4] v[0:3] a[1:16]
-define void @illegal_eviction_assert(ptr addrspace(1) %arg) #0 {
+define void @illegal_eviction_assert(ptr addrspace(1) %arg) "amdgpu-waves-per-eu"="8,8" {
   ;%agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
   %asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,={a[0:15]}"()
   %vgpr0 = extractvalue %asm.output %asm, 0
@@ -25,5 +25,3 @@ define void @illegal_eviction_assert(ptr addrspace(1) %arg) #0 {
   call void asm sideeffect "; use $0 $1 $2 $3 $4","v,v,v,v,{a[1:16]}"(<16 x i32> %vgpr0, <8 x i32> %vgpr1, <5 x i32> %vgpr2, <4 x i32> %vgpr3, <16 x i32> %agpr0)
   ret void
 }
-
-attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
index e4cbae66d47fa8..1b609f20b45a1a 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
@@ -8,12 +8,10 @@
 
 --- |
 
-  define amdgpu_kernel void @regalloc_introduces_s_to_a_copy() #0 {
+  define amdgpu_kernel void @regalloc_introduces_s_to_a_copy() "amdgpu-waves-per-eu"="7,7" {
     ret void
   }
 
-  attributes #0 = { "amdgpu-waves-per-eu"="7,7" }
-
 ...
 ---
 name:            regalloc_introduces_s_to_a_copy
diff --git a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
index 04e995b6f343ef..51c66d22c1144f 100644
--- a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
@@ -12,7 +12,7 @@
 
 ; CHECK: error: ran out of registers during register allocation
 ; CHECK: Bad machine code: Using an undefined physical register
-define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1) #0 {
+define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1) "amdgpu-waves-per-eu"="10,10" {
   %agpr0 = call float asm sideeffect "; def $0", "=${a0}"()
   %agpr.vec = insertelement <16 x float> undef, float %agpr0, i32 0
   %mfma0 = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr.vec, i32 0, i32 0, i32 0)
@@ -24,9 +24,5 @@ define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1)
   ret void
 }
 
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
-attributes #1 = { convergent nounwind readnone willreturn }
-attributes #2 = { nounwind readnone speculatable willreturn }
+declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) convergent nounwind readnone willreturn
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll
index 0c67f00d7bebf7..b08fedf5c3f3bb 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll
@@ -28,7 +28,7 @@
 
 ; EXTIMG:         define <4 x float> @needs_extimg
 ; NOEXTIMG-NOT:   define <4 x float> @needs_extimg
-define <4 x float> @needs_extimg(float noundef %0, float noundef %1, <8 x i32> noundef %2, <4 x i32> noundef %3) #0 {
+define <4 x float> @needs_extimg(float noundef %0, float noundef %1, <8 x i32> noundef %2, <4 x i32> noundef %3) "target-features"="+extended-image-insts" {
   %5 = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 noundef 15, float %0, float %1, <8 x i32> %2, <4 x i32> %3, i1 noundef false, i32 noundef 0, i32 noundef 0)
   ret <4 x float> %5
 }
@@ -43,5 +43,3 @@ define void @caller(float noundef %0, float noundef %1, <8 x i32> noundef %2, <4
 }
 
 declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
-
-attributes #0 = { "target-features"="+extended-image-insts" }
diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll
index a0380c82d9aaf0..4ab08830ea7442 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll
@@ -114,7 +114,7 @@
 ; GFX7: @ConstantExpr = internal global i64 0
 @ConstantExpr = internal global i64 ptrtoint (ptr @needs_dpp to i64)
 
-define void @needs_dpp(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #0 {
+define void @needs_dpp(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+dpp" {
 ; GFX7-NOT:   define void @needs_dpp(
 ; GFX8:       define void @needs_dpp(
 ; GFX9:       define void @needs_dpp(
@@ -138,7 +138,7 @@ endif:
   ret void
 }
 
-define void @needs_16bit_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #1 {
+define void @needs_16bit_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+16-bit-insts" {
 ; GFX7-NOT:   define void @needs_16bit_insts(
 ; GFX8:       define void @needs_16bit_insts(
 ; GFX9:       define void @needs_16bit_insts(
@@ -162,7 +162,7 @@ endif:
   ret void
 }
 
-define void @needs_gfx8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #2 {
+define void @needs_gfx8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+gfx8-insts" {
 ; GFX7-NOT:   define void @needs_gfx8_insts(
 ; GFX8:       define void @needs_gfx8_insts(
 ; GFX9:       define void @needs_gfx8_insts(
@@ -186,7 +186,7 @@ endif:
   ret void
 }
 
-define void @needs_gfx9_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #3 {
+define void @needs_gfx9_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+gfx9-insts" {
 ; GFX7-NOT:   define void @needs_gfx9_insts(
 ; GFX8-NOT:   define void @needs_gfx9_insts(
 ; GFX9:       define void @needs_gfx9_insts(
@@ -210,7 +210,7 @@ endif:
   ret void
 }
 
-define void @needs_gfx10_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #4 {
+define void @needs_gfx10_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+gfx10-insts" {
 ; GFX7-NOT:   define void @needs_gfx10_insts(
 ; GFX8-NOT:   define void @needs_gfx10_insts(
 ; GFX9-NOT:   define void @needs_gfx10_insts(
@@ -234,7 +234,7 @@ endif:
   ret void
 }
 
-define void @needs_gfx11_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #5 {
+define void @needs_gfx11_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+gfx11-insts" {
 ; GFX7-NOT:   define void @needs_gfx11_insts(
 ; GFX8-NOT:   define void @needs_gfx11_insts(
 ; GFX9-NOT:   define void @needs_gfx11_insts(
@@ -258,7 +258,7 @@ endif:
   ret void
 }
 
-define void @needs_dot1_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #6 {
+define void @needs_dot1_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+dot1-insts" {
 ; GFX7-NOT:     define void @needs_dot1_insts(
 ; GFX8-NOT:     define void @needs_dot1_insts(
 ; GFX9:         define void @needs_dot1_insts(
@@ -269,7 +269,7 @@ define void @needs_dot1_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #6 {
   ret void
 }
 
-define void @needs_dot2_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #7 {
+define void @needs_dot2_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+dot2-insts" {
 ; GFX7-NOT:     define void @needs_dot2_insts(
 ; GFX8-NOT:     define void @needs_dot2_insts(
 ; GFX9:         define void @needs_dot2_insts(
@@ -280,7 +280,7 @@ define void @needs_dot2_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #7 {
   ret void
 }
 
-define void @needs_dot3_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #8 {
+define void @needs_dot3_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+dot3-insts" {
 ; GFX7-NOT:   define void @needs_dot3_insts(
 ; GFX8-NOT:   define void @needs_dot3_insts(
 ; GFX906-NOT: define void @needs_dot3_insts(
@@ -293,7 +293,7 @@ define void @needs_dot3_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #8 {
 }
 
 
-define void @needs_dot4_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #9 {
+define void @needs_dot4_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+dot4-insts" {
 ; GFX7-NOT:   define void @needs_dot4_insts(
 ; GFX8-NOT:   define void @needs_dot4_insts(
 ; GFX906-NOT: define void @needs_dot4_insts(
@@ -305,7 +305,7 @@ define void @needs_dot4_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #9 {
   ret void
 }
 
-define void @needs_dot5_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #10 {
+define void @needs_dot5_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+dot5-insts" {
 ; GFX7-NOT:   define void @needs_dot5_insts(
 ; GFX8-NOT:   define void @needs_dot5_insts(
 ; GFX906-NOT: define void @needs_dot5_insts(
@@ -317,7 +317,7 @@ define void @needs_dot5_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #10 {
   ret void
 }
 
-define void @needs_dot6_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #11 {
+define void @needs_dot6_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+dot6-insts" {
 ; GFX7-NOT:   define void @needs_dot6_insts(
 ; GFX8-NOT:   define void @needs_dot6_insts(
 ; GFX906-NOT: define void @needs_dot6_insts(
@@ -329,7 +329,7 @@ define void @needs_dot6_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #11 {
   ret void
 }
 
-define void @needs_dot7_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #12 {
+define void @needs_dot7_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+dot7-insts" {
 ; GFX7-NOT:   define void @needs_dot7_insts(
 ; GFX8-NOT:   define void @needs_dot7_insts(
 ; GFX9:       define void @needs_dot7_insts(
@@ -340,7 +340,7 @@ define void @needs_dot7_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #12 {
   ret void
 }
 
-define void @needs_dot8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #13 {
+define void @needs_dot8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) "target-features"="+dot8-insts" {
 ; GFX7-NOT:   define void @needs_dot8_insts(
 ; GFX8-NOT:   define void @needs_dot8_insts(
 ; GFX9-NOT:   define void @needs_dot8_insts(
@@ -444,18 +444,3 @@ define void @caller(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) {
   ; IR: ret void
   ret void
 }
-
-attributes #0 = { "target-features"="+dpp" }
-attributes #1 = { "target-features"="+16-bit-insts" }
-attributes #2 = { "target-features"="+gfx8-insts" }
-attributes #3 = { "target-features"="+gfx9-insts" }
-attributes #4 = { "target-features"="+gfx10-insts" }
-attributes #5 = { "target-features"="+gfx11-insts" }
-attributes #6 = { "target-features"="+dot1-insts" }
-attributes #7 = { "target-features"="+dot2-insts" }
-attributes #8 = { "target-features"="+dot3-insts" }
-attributes #9 = { "target-features"="+dot4-insts" }
-attributes #10 = { "target-features"="+dot5-insts" }
-attributes #11 = { "target-features"="+dot6-insts" }
-attributes #12 = { "target-features"="+dot7-insts" }
-attributes #13 = { "target-features"="+dot8-insts" }
diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-gws.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-gws.ll
index 594fad389b6b97..85074cbd0ff6d4 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-incompatible-gws.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-gws.ll
@@ -30,7 +30,7 @@
 
 ; COMPATIBLE:         define void @needs_gws
 ; INCOMPATIBLE-NOT:   define void @needs_gws
-define void @needs_gws(i32 %val0, i32 %val1) #0 {
+define void @needs_gws(i32 %val0, i32 %val1) "target-features"="+gws" {
   call void @llvm.amdgcn.ds.gws.init(i32 %val0, i32 %val1)
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val0, i32 %val1)
   ret void
@@ -45,10 +45,5 @@ define void @gws_caller(i32 %val0, i32 %val1) {
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2
-
-
-attributes #0 = { "target-features"="+gws"}
-attributes #1 = { convergent inaccessiblememonly nounwind }
-attributes #2 = { convergent inaccessiblememonly nounwind writeonly }
+declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) convergent inaccessiblememonly nounwind
+declare void @llvm.amdgcn.ds.gws.init(i32, i32) convergent inaccessiblememonly nounwind writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-s-time.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-s-time.ll
index 2c2401f120cf5e..827d56b27e6668 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-incompatible-s-time.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-s-time.ll
@@ -34,7 +34,7 @@
 
 ; REALTIME:         define i64 @needs_s_memrealtime
 ; NOREALTIME-NOT:   define i64 @needs_s_memrealtime
-define i64 @needs_s_memrealtime() #0 {
+define i64 @needs_s_memrealtime() "target-features"="+s-memrealtime" {
   %t = tail call i64 @llvm.amdgcn.s.memrealtime()
   ret i64 %t
 }
@@ -50,7 +50,7 @@ define i64 @s_memrealtime_caller() {
 
 ; MEMTIME:         define i64 @needs_s_memtime
 ; NOMEMTIME-NOT:   define i64 @needs_s_memtime
-define i64 @needs_s_memtime() #1 {
+define i64 @needs_s_memtime() "target-features"="+s-memtime-inst" {
   %t = tail call i64 @llvm.amdgcn.s.memtime()
   ret i64 %t
 }
@@ -67,6 +67,3 @@ define i64 @s_memtime_caller() {
 
 declare i64 @llvm.amdgcn.s.memrealtime()
 declare i64 @llvm.amdgcn.s.memtime()
-
-attributes #0 = { "target-features"="+s-memrealtime"}
-attributes #1 = { "target-features"="+s-memtime-inst"}
diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll
index 8ef1d3ff27e51d..60b7088cbaef75 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll
@@ -22,7 +22,7 @@
 ; WARN-GFX90A: removing function 'needs_wavefrontsize32': +wavefrontsize32 is not supported on the current target
 ; WARN-GFX90A-NOT: not supported
 
-define void @needs_wavefrontsize32(ptr %out) #0 {
+define void @needs_wavefrontsize32(ptr %out) "target-features"="+wavefrontsize32" {
 ; GFX906-NOT:   @needs_wavefrontsize32
 ; GFX90A-NOT:   @needs_wavefrontsize32
 ; GFX10:        define void @needs_wavefrontsize32(
@@ -46,5 +46,3 @@ declare i32 @llvm.read_register.i32(metadata)
 declare i32 @llvm.ctpop.i32(i32)
 
 !0 = !{!"exec_lo"}
-
-attributes #0 = { "target-features"="+wavefrontsize32" }
diff --git a/llvm/test/CodeGen/AMDGPU/repeated-divisor.ll b/llvm/test/CodeGen/AMDGPU/repeated-divisor.ll
index e5f8de6bd521db..81e6811320880f 100644
--- a/llvm/test/CodeGen/AMDGPU/repeated-divisor.ll
+++ b/llvm/test/CodeGen/AMDGPU/repeated-divisor.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
 
-define <2 x float> @v_repeat_divisor_f32_x2(float %x, float %y, float %D) #0 {
+define <2 x float> @v_repeat_divisor_f32_x2(float %x, float %y, float %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_f32_x2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -101,7 +101,7 @@ define <2 x float> @v_repeat_divisor_f32_x2(float %x, float %y, float %D) #0 {
   ret <2 x float> %insert.1
 }
 
-define <2 x float> @v_repeat_divisor_f32_x2_arcp(float %x, float %y, float %D) #0 {
+define <2 x float> @v_repeat_divisor_f32_x2_arcp(float %x, float %y, float %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_f32_x2_arcp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -168,7 +168,7 @@ define <2 x float> @v_repeat_divisor_f32_x2_arcp(float %x, float %y, float %D) #
   ret <2 x float> %insert.1
 }
 
-define <2 x float> @v_repeat_divisor_f32_x2_arcp_daz(float %x, float %y, float %D) #1 {
+define <2 x float> @v_repeat_divisor_f32_x2_arcp_daz(float %x, float %y, float %D) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_repeat_divisor_f32_x2_arcp_daz:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -241,7 +241,7 @@ define <2 x float> @v_repeat_divisor_f32_x2_arcp_daz(float %x, float %y, float %
   ret <2 x float> %insert.1
 }
 
-define <2 x half> @v_repeat_divisor_f16_x2_arcp(half %x, half %y, half %D) #0 {
+define <2 x half> @v_repeat_divisor_f16_x2_arcp(half %x, half %y, half %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_f16_x2_arcp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -292,7 +292,7 @@ define <2 x half> @v_repeat_divisor_f16_x2_arcp(half %x, half %y, half %D) #0 {
   ret <2 x half> %insert.1
 }
 
-define <2 x double> @v_repeat_divisor_f64_x2_arcp(double %x, double %y, double %D) #0 {
+define <2 x double> @v_repeat_divisor_f64_x2_arcp(double %x, double %y, double %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_f64_x2_arcp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -363,7 +363,7 @@ define <2 x double> @v_repeat_divisor_f64_x2_arcp(double %x, double %y, double %
   ret <2 x double> %insert.1
 }
 
-define <3 x float> @v_repeat_divisor_f32_x3_arcp(float %x, float %y, float %z, float %D) #0 {
+define <3 x float> @v_repeat_divisor_f32_x3_arcp(float %x, float %y, float %z, float %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_f32_x3_arcp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -435,7 +435,7 @@ define <3 x float> @v_repeat_divisor_f32_x3_arcp(float %x, float %y, float %z, f
   ret <3 x float> %insert.2
 }
 
-define <4 x float> @v_repeat_divisor_f32_x4_arcp(float %x, float %y, float %z, float %w, float %D) #0 {
+define <4 x float> @v_repeat_divisor_f32_x4_arcp(float %x, float %y, float %z, float %w, float %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_f32_x4_arcp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -512,7 +512,7 @@ define <4 x float> @v_repeat_divisor_f32_x4_arcp(float %x, float %y, float %z, f
   ret <4 x float> %insert.3
 }
 
-define <3 x half> @v_repeat_divisor_f16_x3_arcp(half %x, half %y, half %z, half %D) #0 {
+define <3 x half> @v_repeat_divisor_f16_x3_arcp(half %x, half %y, half %z, half %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_f16_x3_arcp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -570,7 +570,7 @@ define <3 x half> @v_repeat_divisor_f16_x3_arcp(half %x, half %y, half %z, half
   ret <3 x half> %insert.2
 }
 
-define <4 x float> @v_repeat_divisor_v2f32_x2(<2 x float> %x, <2 x float> %y, <2 x float> %D) #0 {
+define <4 x float> @v_repeat_divisor_v2f32_x2(<2 x float> %x, <2 x float> %y, <2 x float> %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_v2f32_x2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -677,7 +677,7 @@ define <4 x float> @v_repeat_divisor_v2f32_x2(<2 x float> %x, <2 x float> %y, <2
   ret <4 x float> %shuffle
 }
 
-define <2 x float> @v_repeat_divisor_f32_x2_ulp25(float %x, float %y, float %D) #0 {
+define <2 x float> @v_repeat_divisor_f32_x2_ulp25(float %x, float %y, float %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_f32_x2_ulp25:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -726,7 +726,7 @@ define <2 x float> @v_repeat_divisor_f32_x2_ulp25(float %x, float %y, float %D)
   ret <2 x float> %insert.1
 }
 
-define <2 x float> @v_repeat_divisor_f32_x2_daz_ulp25(float %x, float %y, float %D) #1 {
+define <2 x float> @v_repeat_divisor_f32_x2_daz_ulp25(float %x, float %y, float %D) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
 ; GFX6-LABEL: v_repeat_divisor_f32_x2_daz_ulp25:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -758,7 +758,7 @@ define <2 x float> @v_repeat_divisor_f32_x2_daz_ulp25(float %x, float %y, float
   ret <2 x float> %insert.1
 }
 
-define <4 x half> @v_repeat_divisor_v2f16_x2(<2 x half> %x, <2 x half> %y, <2 x half> %D) #0 {
+define <4 x half> @v_repeat_divisor_v2f16_x2(<2 x half> %x, <2 x half> %y, <2 x half> %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_v2f16_x2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -830,7 +830,7 @@ define <4 x half> @v_repeat_divisor_v2f16_x2(<2 x half> %x, <2 x half> %y, <2 x
   ret <4 x half> %shuffle
 }
 
-define <6 x half> @v_repeat_divisor_v3f16_x2(<3 x half> %x, <3 x half> %y, <3 x half> %D) #0 {
+define <6 x half> @v_repeat_divisor_v3f16_x2(<3 x half> %x, <3 x half> %y, <3 x half> %D) "denormal-fp-math-f32"="ieee,ieee" {
 ; GFX6-LABEL: v_repeat_divisor_v3f16_x2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -936,9 +936,6 @@ define <6 x half> @v_repeat_divisor_v3f16_x2(<3 x half> %x, <3 x half> %y, <3 x
   ret <6 x half> %shuffle
 }
 
-attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }
-attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-
 !0 = !{float 2.5}
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
index 8c584a1890c9df..004a86f3a1f2ae 100644
--- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
@@ -5,7 +5,7 @@ target datalayout = "n32"
 
 ; CHECK-LABEL: @invalid_reqd_work_group_size(
 ; CHECK: load i16,
-define amdgpu_kernel void @invalid_reqd_work_group_size(ptr addrspace(1) %out) #0 !reqd_work_group_size !1 {
+define amdgpu_kernel void @invalid_reqd_work_group_size(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !1 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -15,7 +15,7 @@ define amdgpu_kernel void @invalid_reqd_work_group_size(ptr addrspace(1) %out) #
 
 ; CHECK-LABEL: @volatile_load_group_size_x(
 ; CHECK: load volatile i16,
-define amdgpu_kernel void @volatile_load_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @volatile_load_group_size_x(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load volatile i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -25,7 +25,7 @@ define amdgpu_kernel void @volatile_load_group_size_x(ptr addrspace(1) %out) #0
 
 ; CHECK-LABEL: @load_group_size_x(
 ; CHECK: store i16 %group.size.x,
-define amdgpu_kernel void @load_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @load_group_size_x(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -35,7 +35,7 @@ define amdgpu_kernel void @load_group_size_x(ptr addrspace(1) %out) #0 !reqd_wor
 
 ; CHECK-LABEL: @load_group_size_y(
 ; CHECK: store i16 %group.size.y,
-define amdgpu_kernel void @load_group_size_y(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @load_group_size_y(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.y = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 6
   %group.size.y = load i16, ptr addrspace(4) %gep.group.size.y, align 4
@@ -45,7 +45,7 @@ define amdgpu_kernel void @load_group_size_y(ptr addrspace(1) %out) #0 !reqd_wor
 
 ; CHECK-LABEL: @load_group_size_z(
 ; CHECK: store i16 %group.size.z,
-define amdgpu_kernel void @load_group_size_z(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @load_group_size_z(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.z = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 8
   %group.size.z = load i16, ptr addrspace(4) %gep.group.size.z, align 4
@@ -56,7 +56,7 @@ define amdgpu_kernel void @load_group_size_z(ptr addrspace(1) %out) #0 !reqd_wor
 ; Metadata uses i64 instead of i32
 ; CHECK-LABEL: @load_group_size_x_reqd_work_group_size_i64(
 ; CHECK: store i16 %group.size.x,
-define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i64(ptr addrspace(1) %out) #0 !reqd_work_group_size !2 {
+define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i64(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !2 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -67,7 +67,7 @@ define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i64(ptr addrsp
 ; Metadata uses i16 instead of i32
 ; CHECK-LABEL: @load_group_size_x_reqd_work_group_size_i16(
 ; CHECK: store i16 %group.size.x,
-define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i16(ptr addrspace(1) %out) #0 !reqd_work_group_size !3 {
+define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i16(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !3 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -77,7 +77,7 @@ define amdgpu_kernel void @load_group_size_x_reqd_work_group_size_i16(ptr addrsp
 
 ; CHECK-LABEL: @use_local_size_x_8_16_2(
 ; CHECK: store i64 %zext,
-define amdgpu_kernel void @use_local_size_x_8_16_2(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @use_local_size_x_8_16_2(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -95,7 +95,7 @@ define amdgpu_kernel void @use_local_size_x_8_16_2(ptr addrspace(1) %out) #0 !re
 
 ; CHECK-LABEL: @use_local_size_y_8_16_2(
 ; CHECK: store i64 %zext,
-define amdgpu_kernel void @use_local_size_y_8_16_2(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @use_local_size_y_8_16_2(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.y = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 6
   %group.size.y = load i16, ptr addrspace(4) %gep.group.size.y, align 4
@@ -113,7 +113,7 @@ define amdgpu_kernel void @use_local_size_y_8_16_2(ptr addrspace(1) %out) #0 !re
 
 ; CHECK-LABEL: @use_local_size_z_8_16_2(
 ; CHECK: store i64 %zext,
-define amdgpu_kernel void @use_local_size_z_8_16_2(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @use_local_size_z_8_16_2(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.z = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 8
   %group.size.z = load i16, ptr addrspace(4) %gep.group.size.z, align 4
@@ -135,7 +135,7 @@ define amdgpu_kernel void @use_local_size_z_8_16_2(ptr addrspace(1) %out) #0 !re
 ; CHECK-LABEL: @local_size_x_8_16_2_wrong_group_id(
 ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.y()
 ; CHECK: %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext
-define amdgpu_kernel void @local_size_x_8_16_2_wrong_group_id(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @local_size_x_8_16_2_wrong_group_id(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -155,7 +155,7 @@ define amdgpu_kernel void @local_size_x_8_16_2_wrong_group_id(ptr addrspace(1) %
 ; CHECK: %grid.size.x = load i32, ptr addrspace(4) %gep.grid.size.x, align 4
 ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x()
 ; CHECK: %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext
-  define amdgpu_kernel void @local_size_x_8_16_2_wrong_grid_size(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+  define amdgpu_kernel void @local_size_x_8_16_2_wrong_grid_size(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -177,7 +177,7 @@ define amdgpu_kernel void @local_size_x_8_16_2_wrong_group_id(ptr addrspace(1) %
 ; CHECK: %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext
 ; CHECK: %sub = sub i32 %grid.size.x, %group.id_x_group.size.x
 ; CHECK: %smin = call i32 @llvm.smin.i32(i32 %sub, i32 %group.size.x.zext)
-define amdgpu_kernel void @local_size_x_8_16_2_wrong_cmp_type(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @local_size_x_8_16_2_wrong_cmp_type(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -198,7 +198,7 @@ define amdgpu_kernel void @local_size_x_8_16_2_wrong_cmp_type(ptr addrspace(1) %
 ; CHECK: %sub = sub i32 %grid.size.x, %group.id_x_group.size.x
 ; CHECK: %umax = call i32 @llvm.umax.i32(i32 %sub, i32 %group.size.x.zext)
 ; CHECK: %zext = zext i32 %umax to i64
-define amdgpu_kernel void @local_size_x_8_16_2_wrong_select(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @local_size_x_8_16_2_wrong_select(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -220,7 +220,7 @@ define amdgpu_kernel void @local_size_x_8_16_2_wrong_select(ptr addrspace(1) %ou
 ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x()
 ; CHECK: %group.id_x_group.size.x = mul i32 %group.id, %group.size.x.zext
 ; CHECK: %sub = sub i32 %grid.size.x.zext, %group.id_x_group.size.x
-define amdgpu_kernel void @use_local_size_x_8_16_2_wrong_grid_load_size(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @use_local_size_x_8_16_2_wrong_grid_load_size(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -239,7 +239,7 @@ define amdgpu_kernel void @use_local_size_x_8_16_2_wrong_grid_load_size(ptr addr
 
 ; CHECK-LABEL: @func_group_size_x(
 ; CHECK: ret i32 %zext
-define i32 @func_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define i32 @func_group_size_x(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -249,9 +249,9 @@ define i32 @func_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0
 
 ; CHECK-LABEL: @__ockl_get_local_size_reqd_size(
 ; CHECK: %group.size = phi i16 [ %tmp24, %bb17 ], [ %tmp16, %bb9 ], [ %tmp8, %bb1 ], [ 1, %bb ]
-define i64 @__ockl_get_local_size_reqd_size(i32 %arg) #1 !reqd_work_group_size !0 {
+define i64 @__ockl_get_local_size_reqd_size(i32 %arg) nounwind readnone speculatable !reqd_work_group_size !0 {
 bb:
-  %tmp = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #2
+  %tmp = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind "uniform-work-group-size"="true"
   switch i32 %arg, label %bb25 [
     i32 0, label %bb1
     i32 1, label %bb9
@@ -298,9 +298,9 @@ bb25:                                             ; preds = %bb17, %bb9, %bb1, %
 ; CHECK: store volatile i64 %tmp34.i, ptr addrspace(1) %out, align 4
 ; CHECK-NEXT: store volatile i64 %tmp34.i14, ptr addrspace(1) %out, align 4
 ; CHECK-NEXT: store volatile i64 %tmp34.i7, ptr addrspace(1) %out, align 4
-define amdgpu_kernel void @all_local_size(ptr addrspace(1) nocapture readnone %out) #0 !reqd_work_group_size !0 {
-  %tmp.i = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-  %tmp2.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #0
+define amdgpu_kernel void @all_local_size(ptr addrspace(1) nocapture readnone %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
+  %tmp.i = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind "uniform-work-group-size"="true"
+  %tmp2.i = tail call i32 @llvm.amdgcn.workgroup.id.x() nounwind "uniform-work-group-size"="true"
   %tmp3.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 12
   %tmp5.i = load i32, ptr addrspace(4) %tmp3.i, align 4
   %tmp6.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 4
@@ -310,7 +310,7 @@ define amdgpu_kernel void @all_local_size(ptr addrspace(1) nocapture readnone %o
   %tmp31.i = sub i32 %tmp5.i, %tmp30.i
   %umin0 = call i32 @llvm.umin.i32(i32 %tmp31.i, i32 %tmp29.i)
   %tmp34.i = zext i32 %umin0 to i64
-  %tmp10.i = tail call i32 @llvm.amdgcn.workgroup.id.y() #0
+  %tmp10.i = tail call i32 @llvm.amdgcn.workgroup.id.y() nounwind "uniform-work-group-size"="true"
   %tmp11.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 16
   %tmp13.i = load i32, ptr addrspace(4) %tmp11.i, align 8
   %tmp14.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 6
@@ -320,7 +320,7 @@ define amdgpu_kernel void @all_local_size(ptr addrspace(1) nocapture readnone %o
   %tmp31.i11 = sub i32 %tmp13.i, %tmp30.i10
   %umin1 = call i32 @llvm.umin.i32(i32 %tmp31.i11, i32 %tmp29.i9)
   %tmp34.i14 = zext i32 %umin1 to i64
-  %tmp18.i = tail call i32 @llvm.amdgcn.workgroup.id.z() #0
+  %tmp18.i = tail call i32 @llvm.amdgcn.workgroup.id.z() nounwind "uniform-work-group-size"="true"
   %tmp19.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 20
   %tmp21.i = load i32, ptr addrspace(4) %tmp19.i, align 4
   %tmp22.i = getelementptr inbounds i8, ptr addrspace(4) %tmp.i, i64 8
@@ -342,7 +342,7 @@ define amdgpu_kernel void @all_local_size(ptr addrspace(1) nocapture readnone %o
 ; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
 ; CHECK-NEXT: %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 4
 ; CHECK-NEXT: store i8 %group.size.x.lo, ptr addrspace(1) %out, align 1
-define amdgpu_kernel void @partial_load_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @partial_load_group_size_x(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 1
@@ -355,7 +355,7 @@ define amdgpu_kernel void @partial_load_group_size_x(ptr addrspace(1) %out) #0 !
 ; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
 ; CHECK-NEXT: %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 2
 ; CHECK-NEXT: store i8 %group.size.x.lo, ptr addrspace(1) %out, align 1
-define amdgpu_kernel void @partial_load_group_size_x_explicit_callsite_align(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @partial_load_group_size_x_explicit_callsite_align(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call align 2 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 1
@@ -367,7 +367,7 @@ define amdgpu_kernel void @partial_load_group_size_x_explicit_callsite_align(ptr
 ; CHECK-LABEL: @load_group_size_xy_i32(
 ; CHECK: %group.size.xy = load i32,
 ; CHECK: store i32 %group.size.xy
-define amdgpu_kernel void @load_group_size_xy_i32(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @load_group_size_xy_i32(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.xy = load i32, ptr addrspace(4) %gep.group.size.x, align 4
@@ -378,7 +378,7 @@ define amdgpu_kernel void @load_group_size_xy_i32(ptr addrspace(1) %out) #0 !req
 ; CHECK-LABEL: @load_group_size_x_y_multiple_dispatch_ptr(
 ; CHECK: store volatile i16 %group.size.x, ptr addrspace(1) %out, align 2
 ; CHECK: store volatile i16 %group.size.y, ptr addrspace(1) %out, align 2
-define amdgpu_kernel void @load_group_size_x_y_multiple_dispatch_ptr(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @load_group_size_x_y_multiple_dispatch_ptr(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" !reqd_work_group_size !0 {
   %dispatch.ptr0 = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr0, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -398,7 +398,7 @@ define amdgpu_kernel void @load_group_size_x_y_multiple_dispatch_ptr(ptr addrspa
 ; CHECK-NEXT: %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
 ; CHECK: %group.size.x.zext = zext i16 %group.size.x to i32
 ; CHECK: store i64 %zext, ptr addrspace(1) %out
-define amdgpu_kernel void @use_local_size_x_uniform_work_group_size(ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @use_local_size_x_uniform_work_group_size(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="true" {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -416,7 +416,7 @@ define amdgpu_kernel void @use_local_size_x_uniform_work_group_size(ptr addrspac
 
 ; CHECK-LABEL: @use_local_size_x_uniform_work_group_size_false(
 ; CHECK: call i32 @llvm.umin
-define amdgpu_kernel void @use_local_size_x_uniform_work_group_size_false(ptr addrspace(1) %out) #3 {
+define amdgpu_kernel void @use_local_size_x_uniform_work_group_size_false(ptr addrspace(1) %out) nounwind "uniform-work-group-size"="false" {
   %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
   %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
@@ -439,18 +439,13 @@ define amdgpu_kernel void @no_use_dispatch_ptr() {
   ret void
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workgroup.id.z() #1
-declare i32 @llvm.umin.i32(i32, i32) #1
-declare i32 @llvm.smin.i32(i32, i32) #1
-declare i32 @llvm.umax.i32(i32, i32) #1
-
-attributes #0 = { nounwind "uniform-work-group-size"="true" }
-attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nounwind "uniform-work-group-size"="true" }
-attributes #3 = { nounwind "uniform-work-group-size"="false" }
+declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.y() nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workgroup.id.z() nounwind readnone speculatable
+declare i32 @llvm.umin.i32(i32, i32) nounwind readnone speculatable
+declare i32 @llvm.smin.i32(i32, i32) nounwind readnone speculatable
+declare i32 @llvm.umax.i32(i32, i32) nounwind readnone speculatable
 
 !0 = !{i32 8, i32 16, i32 2}
 !1 = !{i32 8, i32 16}
diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll
index a0528825d24f10..f09f539d027bc9 100644
--- a/llvm/test/CodeGen/AMDGPU/ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret.ll
@@ -9,9 +9,9 @@
 ; GCN: s_waitcnt expcnt(0)
 ; GCN: v_add_f32_e32 v0, 1.0, v1
 ; GCN-NOT: s_endpgm
-define amdgpu_vs { float, float } @vgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { float, float } @vgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) nounwind {
 bb:
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) nounwind
   %x = fadd float %arg3, 1.000000e+00
   %a = insertvalue { float, float } undef, float %x, 0
   %b = insertvalue { float, float } %a, float %arg3, 1
@@ -27,9 +27,9 @@ bb:
 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
 ; GCN-DAG: s_waitcnt expcnt(0)
 ; GCN-NOT: s_endpgm
-define amdgpu_vs { float, float, float, float } @vgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { float, float, float, float } @vgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) nounwind {
 bb:
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) nounwind
   ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
 }
 
@@ -44,7 +44,7 @@ bb:
 ; GCN: v_mov_b32_e32 v3, v4
 ; GCN: v_mov_b32_e32 v4, v6
 ; GCN-NOT: s_endpgm
-define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) nounwind "InitialPSInputAddr"="0" {
 bb:
   %i0 = extractelement <2 x i32> %arg4, i32 0
   %i1 = extractelement <2 x i32> %arg4, i32 1
@@ -69,7 +69,7 @@ bb:
 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
 ; GCN: v_mov_b32_e32 v0, 1.0
 ; GCN-NOT: s_endpgm
-define amdgpu_ps float @ps_input_ena_no_inputs(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
+define amdgpu_ps float @ps_input_ena_no_inputs(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) nounwind "InitialPSInputAddr"="0" {
 bb:
   ret float 1.000000e+00
 }
@@ -83,7 +83,7 @@ bb:
 ; GCN-DAG: v_mov_b32_e32 v1, v2
 ; GCN-DAG: v_mov_b32_e32 v2, v3
 ; GCN-NOT: s_endpgm
-define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
+define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) nounwind "InitialPSInputAddr"="0" {
 bb:
   %f = bitcast <2 x i32> %arg8 to <2 x float>
   %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
@@ -102,7 +102,7 @@ bb:
 ; GCN-DAG: v_mov_b32_e32 v3, v6
 ; GCN-DAG: v_mov_b32_e32 v4, v8
 ; GCN-NOT: s_endpgm
-define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) nounwind "InitialPSInputAddr"="1" {
 bb:
   %i0 = extractelement <2 x i32> %arg4, i32 0
   %i1 = extractelement <2 x i32> %arg4, i32 1
@@ -131,7 +131,7 @@ bb:
 ; GCN-DAG: v_mov_b32_e32 v3, v8
 ; GCN-DAG: v_mov_b32_e32 v4, v12
 ; GCN-NOT: s_endpgm
-define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) nounwind "InitialPSInputAddr"="119" {
 bb:
   %i0 = extractelement <2 x i32> %arg4, i32 0
   %i1 = extractelement <2 x i32> %arg4, i32 1
@@ -160,7 +160,7 @@ bb:
 ; GCN: v_mov_b32_e32 v3, v4
 ; GCN: v_mov_b32_e32 v4, v8
 ; GCN-NOT: s_endpgm
-define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
+define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) nounwind "InitialPSInputAddr"="418" {
 bb:
   %i0 = extractelement <2 x i32> %arg4, i32 0
   %i1 = extractelement <2 x i32> %arg4, i32 1
@@ -182,7 +182,7 @@ bb:
 ; GCN-DAG: s_mov_b32 s2, s3
 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
 ; GCN-NOT: s_endpgm
-define amdgpu_vs { i32, i32, i32 } @sgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { i32, i32, i32 } @sgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) nounwind {
 bb:
   %x = add i32 %arg2, 2
   %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
@@ -198,7 +198,7 @@ bb:
 ; GCN-DAG: s_mov_b32 s2, 7
 ; GCN-DAG: s_mov_b32 s3, 8
 ; GCN-NOT: s_endpgm
-define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) nounwind {
 bb:
   %x = add i32 %arg2, 2
   ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
@@ -213,9 +213,9 @@ bb:
 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
 ; GCN-DAG: s_mov_b32 s2, s3
 ; GCN-NOT: s_endpgm
-define amdgpu_vs { float, i32, float, i32, i32 } @both(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { float, i32, float, i32, i32 } @both(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) nounwind {
 bb:
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) nounwind
   %v = fadd float %arg3, 1.000000e+00
   %s = add i32 %arg2, 2
   %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
@@ -235,22 +235,16 @@ bb:
 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
 ; GCN-DAG: s_waitcnt expcnt(0)
-define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
+define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) nounwind {
 bb:
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) nounwind
   ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
 }
 
 ; GCN-LABEL: {{^}}ret_return_to_epilog_pseudo_size:
 ; GCN: codeLenInByte = 0{{$}}
-define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 {
+define amdgpu_ps float @ret_return_to_epilog_pseudo_size() nounwind {
   ret float undef
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "InitialPSInputAddr"="0" }
-attributes #2 = { nounwind "InitialPSInputAddr"="1" }
-attributes #3 = { nounwind "InitialPSInputAddr"="119" }
-attributes #4 = { nounwind "InitialPSInputAddr"="418" }
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
index ad38d78ddb2ff1..81012e0486f03e 100644
--- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
@@ -21,14 +21,14 @@
 ; GCN-NEXT: [[RET_BB]]:
 ; GCN-NEXT: ; return
 ; GCN-NEXT: .Lfunc_end0
-define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) "InitialPSInputAddr"="36983" {
 entry:
   %i.i = extractelement <2 x i32> %arg7, i32 0
   %j.i = extractelement <2 x i32> %arg7, i32 1
   %i.f.i = bitcast i32 %i.i to float
   %j.f.i = bitcast i32 %j.i to float
-  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 1, i32 0, i32 %arg5) #2
-  %p2 = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 1, i32 0, i32 %arg5) #2
+  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 1, i32 0, i32 %arg5) nounwind
+  %p2 = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 1, i32 0, i32 %arg5) nounwind
   %p87 = fmul float %p2, %p2
   %p88 = fadd float %p87, %p87
   %p93 = fadd float %p88, %p88
@@ -73,14 +73,14 @@ ret.bb:                                          ; preds = %else, %main_body
 
 ; GCN: ; %ret.bb
 ; GCN: store_dword
-define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) "InitialPSInputAddr"="36983" {
 main_body:
   %i.i = extractelement <2 x i32> %arg7, i32 0
   %j.i = extractelement <2 x i32> %arg7, i32 1
   %i.f.i = bitcast i32 %i.i to float
   %j.f.i = bitcast i32 %j.i to float
-  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 1, i32 0, i32 %arg5) #2
-  %p2 = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 1, i32 0, i32 %arg5) #2
+  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 1, i32 0, i32 %arg5) nounwind
+  %p2 = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 1, i32 0, i32 %arg5) nounwind
   %p87 = fmul float %p2, %p2
   %p88 = fadd float %p87, %p87
   %p93 = fadd float %p88, %p88
@@ -108,23 +108,19 @@ ret.bb:                                          ; preds = %else, %main_body
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fabs.f32(float) #1
+declare float @llvm.fabs.f32(float) nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.sqrt.f32(float) #1
+declare float @llvm.sqrt.f32(float) nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.floor.f32(float) #1
-
-attributes #0 = { "InitialPSInputAddr"="36983" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
+declare float @llvm.floor.f32(float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
index d946d594fde2f0..29c668bce8b3e4 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
@@ -1,59 +1,59 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-rewrite-out-arguments < %s | FileCheck %s
 
-define void @no_ret_blocks() #0 {
+define void @no_ret_blocks() nounwind {
   unreachable
 }
 
-define void @void_one_out_arg_i32_no_use(ptr addrspace(5) %val) #0 {
+define void @void_one_out_arg_i32_no_use(ptr addrspace(5) %val) nounwind {
   ret void
 }
 
-define void @skip_byval_arg(ptr addrspace(5) byval(i32) %val) #0 {
+define void @skip_byval_arg(ptr addrspace(5) byval(i32) %val) nounwind {
   store i32 0, ptr addrspace(5) %val
   ret void
 }
 
-define void @skip_optnone(ptr addrspace(5) byval(i32) %val) #1 {
+define void @skip_optnone(ptr addrspace(5) byval(i32) %val) nounwind noinline optnone {
   store i32 0, ptr addrspace(5) %val
   ret void
 }
 
-define void @skip_volatile(ptr addrspace(5) byval(i32) %val) #0 {
+define void @skip_volatile(ptr addrspace(5) byval(i32) %val) nounwind {
   store volatile i32 0, ptr addrspace(5) %val
   ret void
 }
 
-define void @skip_atomic(ptr addrspace(5) byval(i32) %val) #0 {
+define void @skip_atomic(ptr addrspace(5) byval(i32) %val) nounwind {
   store atomic i32 0, ptr addrspace(5) %val seq_cst, align 4
   ret void
 }
 
-define void @skip_store_pointer_val(ptr addrspace(5) %val) #0 {
+define void @skip_store_pointer_val(ptr addrspace(5) %val) nounwind {
   store ptr addrspace(5) %val, ptr poison
   ret void
 }
 
-define void @skip_store_gep(ptr addrspace(5) %val) #0 {
+define void @skip_store_gep(ptr addrspace(5) %val) nounwind {
   %gep = getelementptr inbounds i32, ptr addrspace(5) %val, i32 1
   store i32 0, ptr addrspace(5) %gep
   ret void
 }
 
-define void @skip_sret(ptr addrspace(5) sret(i32) %sret, ptr addrspace(5) %out) #0 {
+define void @skip_sret(ptr addrspace(5) sret(i32) %sret, ptr addrspace(5) %out) nounwind {
   store i32 1, ptr addrspace(5) %sret
   store i32 0, ptr addrspace(5) %out
   ret void
 }
 
 
-define void @void_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 {
+define void @void_one_out_arg_i32_1_use(ptr addrspace(5) %val) nounwind {
   store i32 0, ptr addrspace(5) %val
   ret void
 }
 
 
-define void @void_one_out_arg_i32_1_use_align(ptr addrspace(5) align 8 %val) #0 {
+define void @void_one_out_arg_i32_1_use_align(ptr addrspace(5) align 8 %val) nounwind {
   store i32 0, ptr addrspace(5) %val, align 8
   ret void
 }
@@ -61,7 +61,7 @@ define void @void_one_out_arg_i32_1_use_align(ptr addrspace(5) align 8 %val) #0
 
 
 
-define void @void_one_out_arg_i32_2_use(i1 %arg0, ptr addrspace(5) %val) #0 {
+define void @void_one_out_arg_i32_2_use(i1 %arg0, ptr addrspace(5) %val) nounwind {
   br i1 %arg0, label %ret0, label %ret1
 
 ret0:
@@ -76,14 +76,14 @@ ret1:
 declare void @may.clobber()
 
 
-define void @void_one_out_arg_i32_2_stores(ptr addrspace(5) %val) #0 {
+define void @void_one_out_arg_i32_2_stores(ptr addrspace(5) %val) nounwind {
   store i32 0, ptr addrspace(5) %val
   store i32 1, ptr addrspace(5) %val
   ret void
 }
 
 
-define void @void_one_out_arg_i32_2_stores_clobber(ptr addrspace(5) %val) #0 {
+define void @void_one_out_arg_i32_2_stores_clobber(ptr addrspace(5) %val) nounwind {
   store i32 0, ptr addrspace(5) %val
   call void @may.clobber()
   store i32 1, ptr addrspace(5) %val
@@ -91,26 +91,26 @@ define void @void_one_out_arg_i32_2_stores_clobber(ptr addrspace(5) %val) #0 {
 }
 
 
-define void @void_one_out_arg_i32_call_may_clobber(ptr addrspace(5) %val) #0 {
+define void @void_one_out_arg_i32_call_may_clobber(ptr addrspace(5) %val) nounwind {
   store i32 0, ptr addrspace(5) %val
   call void @may.clobber()
   ret void
 }
 
 
-define void @void_one_out_arg_i32_pre_call_may_clobber(ptr addrspace(5) %val) #0 {
+define void @void_one_out_arg_i32_pre_call_may_clobber(ptr addrspace(5) %val) nounwind {
   call void @may.clobber()
   store i32 0, ptr addrspace(5) %val
   ret void
 }
 
-define void @void_one_out_arg_i32_reload(ptr addrspace(5) %val) #0 {
+define void @void_one_out_arg_i32_reload(ptr addrspace(5) %val) nounwind {
   store i32 0, ptr addrspace(5) %val
   %load = load i32, ptr addrspace(5) %val, align 4
   ret void
 }
 
-define void @void_one_out_arg_i32_store_in_different_block(ptr addrspace(5) %out) #0 {
+define void @void_one_out_arg_i32_store_in_different_block(ptr addrspace(5) %out) nounwind {
   %load = load i32, ptr addrspace(1) poison
   store i32 0, ptr addrspace(5) %out
   br label %ret
@@ -120,7 +120,7 @@ ret:
 }
 
 
-define void @unused_out_arg_one_branch(i1 %arg0, ptr addrspace(5) %val) #0 {
+define void @unused_out_arg_one_branch(i1 %arg0, ptr addrspace(5) %val) nounwind {
   br i1 %arg0, label %ret0, label %ret1
 
 ret0:
@@ -132,7 +132,7 @@ ret1:
 }
 
 
-define void @void_one_out_arg_v2i32_1_use(ptr addrspace(5) %val) #0 {
+define void @void_one_out_arg_v2i32_1_use(ptr addrspace(5) %val) nounwind {
   store <2 x i32> <i32 17, i32 9>, ptr addrspace(5) %val
   ret void
 }
@@ -141,39 +141,39 @@ define void @void_one_out_arg_v2i32_1_use(ptr addrspace(5) %val) #0 {
 
 
 ; Normally this is split into element accesses which we don't handle.
-define void @void_one_out_arg_struct_1_use(ptr addrspace(5) %out) #0 {
+define void @void_one_out_arg_struct_1_use(ptr addrspace(5) %out) nounwind {
   store %struct { i32 9, i8 99, float 4.0 }, ptr addrspace(5) %out
   ret void
 }
 
 
-define i32 @i32_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 {
+define i32 @i32_one_out_arg_i32_1_use(ptr addrspace(5) %val) nounwind {
   store i32 24, ptr addrspace(5) %val
   ret i32 9
 }
 
 
-define void @unused_different_type(ptr addrspace(5) %arg0, ptr addrspace(5) nocapture %arg1) #0 {
+define void @unused_different_type(ptr addrspace(5) %arg0, ptr addrspace(5) nocapture %arg1) nounwind {
   store float 4.0, ptr addrspace(5) %arg1, align 4
   ret void
 }
 
 
-define void @multiple_same_return_noalias(ptr addrspace(5) noalias %out0, ptr addrspace(5) noalias %out1) #0 {
+define void @multiple_same_return_noalias(ptr addrspace(5) noalias %out0, ptr addrspace(5) noalias %out1) nounwind {
   store i32 1, ptr addrspace(5) %out0, align 4
   store i32 2, ptr addrspace(5) %out1, align 4
   ret void
 }
 
 
-define void @multiple_same_return_mayalias(ptr addrspace(5) %out0, ptr addrspace(5) %out1) #0 {
+define void @multiple_same_return_mayalias(ptr addrspace(5) %out0, ptr addrspace(5) %out1) nounwind {
   store i32 1, ptr addrspace(5) %out0, align 4
   store i32 2, ptr addrspace(5) %out1, align 4
   ret void
 }
 
 
-define void @multiple_same_return_mayalias_order(ptr addrspace(5) %out0, ptr addrspace(5) %out1) #0 {
+define void @multiple_same_return_mayalias_order(ptr addrspace(5) %out0, ptr addrspace(5) %out1) nounwind {
   store i32 2, ptr addrspace(5) %out1, align 4
   store i32 1, ptr addrspace(5) %out0, align 4
   ret void
@@ -181,7 +181,7 @@ define void @multiple_same_return_mayalias_order(ptr addrspace(5) %out0, ptr add
 
 ; Currently this fails to convert because the store won't be found if
 ; it isn't in the same block as the return.
-define i32 @store_in_entry_block(i1 %arg0, ptr addrspace(5) %out) #0 {
+define i32 @store_in_entry_block(i1 %arg0, ptr addrspace(5) %out) nounwind {
 entry:
   %val0 = load i32, ptr addrspace(1) poison
   store i32 %val0, ptr addrspace(5) %out
@@ -197,7 +197,7 @@ endif:
 }
 
 
-define i1 @i1_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 {
+define i1 @i1_one_out_arg_i32_1_use(ptr addrspace(5) %val) nounwind {
   store i32 24, ptr addrspace(5) %val
   ret i1 true
 }
@@ -206,65 +206,65 @@ define i1 @i1_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 {
 ; incompatible with struct return types.
 
 
-define zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 {
+define zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(ptr addrspace(5) %val) nounwind {
   store i32 24, ptr addrspace(5) %val
   ret i1 true
 }
 
 
-define signext i1 @i1_signext_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 {
+define signext i1 @i1_signext_one_out_arg_i32_1_use(ptr addrspace(5) %val) nounwind {
   store i32 24, ptr addrspace(5) %val
   ret i1 true
 }
 
 
-define noalias ptr addrspace(1) @p1i32_noalias_one_out_arg_i32_1_use(ptr addrspace(5) %val) #0 {
+define noalias ptr addrspace(1) @p1i32_noalias_one_out_arg_i32_1_use(ptr addrspace(5) %val) nounwind {
   store i32 24, ptr addrspace(5) %val
   ret ptr addrspace(1) null
 }
 
-define void @void_one_out_non_private_arg_i32_1_use(ptr addrspace(1) %val) #0 {
+define void @void_one_out_non_private_arg_i32_1_use(ptr addrspace(1) %val) nounwind {
   store i32 0, ptr addrspace(1) %val
   ret void
 }
 
-define void @func_ptr_type(ptr addrspace(5) %out) #0 {
+define void @func_ptr_type(ptr addrspace(5) %out) nounwind {
   %func = load ptr, ptr poison
   store ptr %func, ptr addrspace(5) %out
   ret void
 }
 
-define void @bitcast_func_ptr_type(ptr addrspace(5) %out) #0 {
+define void @bitcast_func_ptr_type(ptr addrspace(5) %out) nounwind {
   %func = load ptr, ptr poison
   store ptr %func, ptr addrspace(5) %out
   ret void
 }
 
 
-define void @out_arg_small_array(ptr addrspace(5) %val) #0 {
+define void @out_arg_small_array(ptr addrspace(5) %val) nounwind {
   store [4 x i32] [i32 0, i32 1, i32 2, i32 3], ptr addrspace(5) %val
   ret void
 }
 
-define void @out_arg_large_array(ptr addrspace(5) %val) #0 {
+define void @out_arg_large_array(ptr addrspace(5) %val) nounwind {
   store [17 x i32] zeroinitializer, ptr addrspace(5) %val
   ret void
 }
 
-define <16 x i32> @num_regs_return_limit(ptr addrspace(5) %out, i32 %val) #0 {
+define <16 x i32> @num_regs_return_limit(ptr addrspace(5) %out, i32 %val) nounwind {
   %load = load volatile <16 x i32>, ptr addrspace(1) poison
   store i32 %val, ptr addrspace(5) %out
   ret <16 x i32> %load
 }
 
-define [15 x i32] @num_regs_reach_limit(ptr addrspace(5) %out, i32 %val) #0 {
+define [15 x i32] @num_regs_reach_limit(ptr addrspace(5) %out, i32 %val) nounwind {
   %load = load volatile [15 x i32], ptr addrspace(1) poison
   store i32 %val, ptr addrspace(5) %out
   ret [15 x i32] %load
 }
 
 
-define [15 x i32] @num_regs_reach_limit_leftover(ptr addrspace(5) %out0, ptr addrspace(5) %out1, i32 %val0) #0 {
+define [15 x i32] @num_regs_reach_limit_leftover(ptr addrspace(5) %out0, ptr addrspace(5) %out1, i32 %val0) nounwind {
   %load0 = load volatile [15 x i32], ptr addrspace(1) poison
   %load1 = load volatile i32, ptr addrspace(1) poison
   store i32 %val0, ptr addrspace(5) %out0
@@ -273,13 +273,13 @@ define [15 x i32] @num_regs_reach_limit_leftover(ptr addrspace(5) %out0, ptr add
 }
 
 
-define void @preserve_debug_info(i32 %arg0, ptr addrspace(5) %val) #0 !dbg !5 {
+define void @preserve_debug_info(i32 %arg0, ptr addrspace(5) %val) nounwind !dbg !5 {
   call void @may.clobber(), !dbg !10
   store i32 %arg0, ptr addrspace(5) %val, !dbg !11
   ret void, !dbg !12
 }
 
-define void @preserve_metadata(i32 %arg0, ptr addrspace(5) %val) #0 !kernel_arg_access_qual !13 {
+define void @preserve_metadata(i32 %arg0, ptr addrspace(5) %val) nounwind !kernel_arg_access_qual !13 {
   call void @may.clobber()
   store i32 %arg0, ptr addrspace(5) %val
   ret void
@@ -287,13 +287,13 @@ define void @preserve_metadata(i32 %arg0, ptr addrspace(5) %val) #0 !kernel_arg_
 
 ; Clang emits this pattern for 3-vectors for some reason.
 
-define void @bitcast_pointer_v4i32_v3i32(ptr addrspace(5) %out) #0 {
+define void @bitcast_pointer_v4i32_v3i32(ptr addrspace(5) %out) nounwind {
   %load = load volatile <4 x i32>, ptr addrspace(1) poison
   store <4 x i32> %load, ptr addrspace(5) %out
   ret void
 }
 
-define void @bitcast_pointer_v4i32_v3f32(ptr addrspace(5) %out) #0 {
+define void @bitcast_pointer_v4i32_v3f32(ptr addrspace(5) %out) nounwind {
   %load = load volatile <4 x i32>, ptr addrspace(1) poison
   store <4 x i32> %load, ptr addrspace(5) %out
   ret void
@@ -304,19 +304,19 @@ define void @bitcast_pointer_v4i32_v3f32(ptr addrspace(5) %out) #0 {
 ; casts.
 
 
-define void @bitcast_pointer_i32_f32(ptr addrspace(5) %out) #0 {
+define void @bitcast_pointer_i32_f32(ptr addrspace(5) %out) nounwind {
   %load = load volatile i32, ptr addrspace(1) poison
   store i32 %load, ptr addrspace(5) %out
   ret void
 }
 
-define void @bitcast_pointer_i32_f16(ptr addrspace(5) %out) #0 {
+define void @bitcast_pointer_i32_f16(ptr addrspace(5) %out) nounwind {
   %load = load volatile i32, ptr addrspace(1) poison
   store i32 %load, ptr addrspace(5) %out
   ret void
 }
 
-define void @bitcast_pointer_f16_i32(ptr addrspace(5) %out) #0 {
+define void @bitcast_pointer_f16_i32(ptr addrspace(5) %out) nounwind {
   %load = load volatile half, ptr addrspace(1) poison
   store half %load, ptr addrspace(5) %out
   ret void
@@ -329,64 +329,64 @@ define void @bitcast_pointer_f16_i32(ptr addrspace(5) %out) #0 {
 %struct.v4f32 = type { <4 x float> }
 
 
-define void @bitcast_struct_v3f32_v3f32(ptr addrspace(5) %out, <3 x float> %value) #0 {
+define void @bitcast_struct_v3f32_v3f32(ptr addrspace(5) %out, <3 x float> %value) nounwind {
   %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   store <4 x float> %extractVec, ptr addrspace(5) %out, align 16
   ret void
 }
 
 
-define void @bitcast_struct_v3f32_v3i32(ptr addrspace(5) %out, <3 x i32> %value) #0 {
+define void @bitcast_struct_v3f32_v3i32(ptr addrspace(5) %out, <3 x i32> %value) nounwind {
   %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   store <4 x i32> %extractVec, ptr addrspace(5) %out, align 16
   ret void
 }
 
 
-define void @bitcast_struct_v4f32_v4f32(ptr addrspace(5) %out, <4 x float> %value) #0 {
+define void @bitcast_struct_v4f32_v4f32(ptr addrspace(5) %out, <4 x float> %value) nounwind {
   store <4 x float> %value, ptr addrspace(5) %out, align 16
   ret void
 }
 
-define void @bitcast_struct_v3f32_v4i32(ptr addrspace(5) %out, <4 x i32> %value) #0 {
+define void @bitcast_struct_v3f32_v4i32(ptr addrspace(5) %out, <4 x i32> %value) nounwind {
   store <4 x i32> %value, ptr addrspace(5) %out, align 16
   ret void
 }
 
-define void @bitcast_struct_v4f32_v3f32(ptr addrspace(5) %out, <3 x float> %value) #0 {
+define void @bitcast_struct_v4f32_v3f32(ptr addrspace(5) %out, <3 x float> %value) nounwind {
   %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   store <4 x float> %extractVec, ptr addrspace(5) %out, align 16
   ret void
 }
 
-define void @bitcast_struct_v3f32_v2f32(ptr addrspace(5) %out, <2 x float> %value) #0 {
+define void @bitcast_struct_v3f32_v2f32(ptr addrspace(5) %out, <2 x float> %value) nounwind {
   store <2 x float> %value, ptr addrspace(5) %out, align 8
   ret void
 }
 
-define void @bitcast_struct_v3f32_f32_v3f32(ptr addrspace(5) %out, <3 x float> %value) #0 {
+define void @bitcast_struct_v3f32_f32_v3f32(ptr addrspace(5) %out, <3 x float> %value) nounwind {
   %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   store <4 x float> %extractVec, ptr addrspace(5) %out, align 16
   ret void
 }
 
-define void @bitcast_struct_v3f32_f32_v4f32(ptr addrspace(5) %out, <4 x float> %value) #0 {
+define void @bitcast_struct_v3f32_f32_v4f32(ptr addrspace(5) %out, <4 x float> %value) nounwind {
   store <4 x float> %value, ptr addrspace(5) %out, align 16
   ret void
 }
 
-define void @bitcast_struct_i128_v4f32(ptr addrspace(5) %out, <4 x float> %value) #0 {
+define void @bitcast_struct_i128_v4f32(ptr addrspace(5) %out, <4 x float> %value) nounwind {
   store <4 x float> %value, ptr addrspace(5) %out, align 16
   ret void
 }
 
-define void @bitcast_array_v4i32_v4f32(ptr addrspace(5) %out, [4 x float] %value) #0 {
+define void @bitcast_array_v4i32_v4f32(ptr addrspace(5) %out, [4 x float] %value) nounwind {
   store [4 x float] %value, ptr addrspace(5) %out, align 4
   ret void
 }
 
 
-define void @multi_return_bitcast_struct_v3f32_v3f32(i1 %cond, ptr addrspace(5) %out, <3 x float> %value) #0 {
+define void @multi_return_bitcast_struct_v3f32_v3f32(i1 %cond, ptr addrspace(5) %out, <3 x float> %value) nounwind {
 entry:
   br i1 %cond, label %ret0, label %ret1
 
@@ -401,15 +401,11 @@ ret1:
   ret void
 }
 
-define void @bitcast_v3f32_struct_v3f32(ptr addrspace(5) %out, %struct.v3f32 %value) #0 {
+define void @bitcast_v3f32_struct_v3f32(ptr addrspace(5) %out, %struct.v3f32 %value) nounwind {
   store %struct.v3f32 %value, ptr addrspace(5) %out, align 4
   ret void
 }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline optnone }
-attributes #2 = { alwaysinline nounwind }
-
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
 
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
index 85d0c054754d03..59f8612d1067f1 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
@@ -7,9 +7,7 @@
     unreachable, !dbg !11
   }
 
-  declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
-  attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+  declare void @llvm.dbg.value(metadata, metadata, metadata) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
   !llvm.dbg.cu = !{!0}
   !llvm.mir.debugify = !{!2, !3}
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-undef-for-phi.ll b/llvm/test/CodeGen/AMDGPU/rewrite-undef-for-phi.ll
index 4f6b9f474c1317..0a8073792fe1ba 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-undef-for-phi.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-undef-for-phi.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -mtriple=amdgcn-- -S -amdgpu-rewrite-undef-for-phi %s | FileCheck -check-prefix=OPT %s
 ; RUN: opt -mtriple=amdgcn-- -S -passes=amdgpu-rewrite-undef-for-phi %s | FileCheck -check-prefix=OPT %s
 
-define amdgpu_ps float @basic(float inreg %c, i32 %x) #0 {
+define amdgpu_ps float @basic(float inreg %c, i32 %x) nounwind noinline {
 ; OPT-LABEL: @basic(
 ; OPT-NEXT:  entry:
 ; OPT-NEXT:    [[CC:%.*]] = icmp slt i32 [[X:%.*]], 0
@@ -24,7 +24,7 @@ end:
   ret float %c2
 }
 
-define amdgpu_ps float @with_uniform_region_inside(float inreg %c, i32 inreg %d, i32 %x) #0 {
+define amdgpu_ps float @with_uniform_region_inside(float inreg %c, i32 inreg %d, i32 %x) nounwind noinline {
 ; OPT-LABEL: @with_uniform_region_inside(
 ; OPT-NEXT:  entry:
 ; OPT-NEXT:    [[CC:%.*]] = icmp slt i32 [[X:%.*]], 0
@@ -65,7 +65,7 @@ end:
   ret float %c2
 }
 
-define amdgpu_ps float @exclude_backedge(float inreg %c, i32 %x) #0 {
+define amdgpu_ps float @exclude_backedge(float inreg %c, i32 %x) nounwind noinline {
 ; OPT-LABEL: @exclude_backedge(
 ; OPT-NEXT:  entry:
 ; OPT-NEXT:    [[CC:%.*]] = icmp slt i32 [[X:%.*]], 0
@@ -100,5 +100,3 @@ end:
   %r = phi float [ %c2, %loop_end ], [ %c, %entry ]
   ret float %r
 }
-
-attributes #0 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/roundeven.ll b/llvm/test/CodeGen/AMDGPU/roundeven.ll
index 528dcfc6bc0382..a7dc8ef7ee59ed 100644
--- a/llvm/test/CodeGen/AMDGPU/roundeven.ll
+++ b/llvm/test/CodeGen/AMDGPU/roundeven.ll
@@ -1273,19 +1273,17 @@ define <2 x double> @v_roundeven_v2f64(<2 x double> %x) {
   ret <2 x double> %roundeven
 }
 
-declare half @llvm.roundeven.f16(half) #0
-declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0
-declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0
+declare half @llvm.roundeven.f16(half) nounwind readnone speculatable willreturn
+declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) nounwind readnone speculatable willreturn
+declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) nounwind readnone speculatable willreturn
 
-declare float @llvm.roundeven.f32(float) #0
-declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0
-declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0
-declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0
+declare float @llvm.roundeven.f32(float) nounwind readnone speculatable willreturn
+declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) nounwind readnone speculatable willreturn
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) nounwind readnone speculatable willreturn
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) nounwind readnone speculatable willreturn
 
-declare double @llvm.roundeven.f64(double) #0
-declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0
+declare double @llvm.roundeven.f64(double) nounwind readnone speculatable willreturn
+declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) nounwind readnone speculatable willreturn
 
-declare half @llvm.fabs.f16(half) #0
-declare float @llvm.fabs.f32(float) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare half @llvm.fabs.f16(half) nounwind readnone speculatable willreturn
+declare float @llvm.fabs.f32(float) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
index 846fbdb33d668d..cdc10c709f21ce 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
@@ -2431,8 +2431,6 @@ define float @v_rsq_f32_known_never_posdenormal(float nofpclass(psub) %val) {
 
 !0 = !{float 2.500000e+00}
 !1 = !{float 1.000000e+00}
-
-attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CI-DAZ-SAFE: {{.*}}
 ; CI-DAZ-UNSAFE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index bd6e1f54e636d8..71477f834901ea 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -4514,7 +4514,7 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
   ret <2 x double> %rsq
 }
 
-define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
+define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) "unsafe-fp-math"="true" {
 ; SI-SDAG-LABEL: s_rsq_f64_unsafe:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
@@ -4670,7 +4670,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
   ret <2 x i32> %insert.1
 }
 
-define double @v_rsq_f64_unsafe(double %x) #0 {
+define double @v_rsq_f64_unsafe(double %x) "unsafe-fp-math"="true" {
 ; SI-SDAG-LABEL: v_rsq_f64_unsafe:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -5746,8 +5746,6 @@ define double @v_div_const_contract_sqrt_f64(double %x) {
   %rsq = fdiv contract double 256.0, %sqrt
   ret double %rsq
 }
-
-attributes #0 = { "unsafe-fp-math"="true" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
 ; GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/rv7x0_count3.ll b/llvm/test/CodeGen/AMDGPU/rv7x0_count3.ll
index 30f32e1f3e3369..8b7237e5e8e92b 100644
--- a/llvm/test/CodeGen/AMDGPU/rv7x0_count3.ll
+++ b/llvm/test/CodeGen/AMDGPU/rv7x0_count3.ll
@@ -47,6 +47,4 @@ bb:
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll
index 0b58b950505244..50293ee63fdc1b 100644
--- a/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll
@@ -107,14 +107,11 @@ define amdgpu_kernel void @no_s_addk_i32_k0(ptr addrspace(1) %out, i32 %b) {
 
 ; SI-LABEL: {{^}}commute_s_addk_i32:
 ; SI: s_addk_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_s_addk_i32(ptr addrspace(1) %out, i32 %b) #0 {
+define amdgpu_kernel void @commute_s_addk_i32(ptr addrspace(1) %out, i32 %b) nounwind {
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %add = add i32 %size, %b
   call void asm sideeffect "; foo $0, $1", "v,s"(ptr addrspace(3) @lds, i32 %add)
   ret void
 }
 
-declare i32 @llvm.amdgcn.groupstaticsize() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.groupstaticsize() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/s_code_end.ll b/llvm/test/CodeGen/AMDGPU/s_code_end.ll
index ad7d8a14babc19..e438817a742542 100644
--- a/llvm/test/CodeGen/AMDGPU/s_code_end.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_code_end.ll
@@ -17,7 +17,7 @@
 
 ; GCN-OBJ-NEXT:           s_nop 0
 
-define amdgpu_kernel void @a_kernel1() #0 {
+define amdgpu_kernel void @a_kernel1() "amdgpu-flat-work-group-size"="1,512" {
   ret void
 }
 
@@ -28,7 +28,7 @@ define amdgpu_kernel void @a_kernel1() #0 {
 
 ; GCN-OBJ:   {{^$}}
 
-define amdgpu_kernel void @a_kernel2() #0 {
+define amdgpu_kernel void @a_kernel2() "amdgpu-flat-work-group-size"="1,512" {
   ret void
 }
 
@@ -62,8 +62,6 @@ define amdgpu_kernel void @a_kernel2() #0 {
 ; GFX90AEND-OBJ:           s_nop 0 // 000000000140:
 ; GFX90AEND-OBJ-COUNT-255: s_nop 0
 
-define void @a_function() #0 {
+define void @a_function() "amdgpu-flat-work-group-size"="1,512" {
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,512" }
diff --git a/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll
index c7987d3d009175..cc4cdf60ac1785 100644
--- a/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll
@@ -44,14 +44,11 @@ define amdgpu_kernel void @no_s_mulk_i32_k0(ptr addrspace(1) %out, i32 %b) {
 
 ; SI-LABEL: {{^}}commute_s_mulk_i32:
 ; SI: s_mulk_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_s_mulk_i32(ptr addrspace(1) %out, i32 %b) #0 {
+define amdgpu_kernel void @commute_s_mulk_i32(ptr addrspace(1) %out, i32 %b) nounwind {
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %add = mul i32 %size, %b
   call void asm sideeffect "; foo $0, $1", "v,s"(ptr addrspace(3) @lds, i32 %add)
   ret void
 }
 
-declare i32 @llvm.amdgcn.groupstaticsize() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.groupstaticsize() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
index 714b2af1698fe1..8e6a0358076d1a 100644
--- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -2,8 +2,8 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI -check-prefix=CI-NOHSA %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
 
 ; In this test both the pointer and the offset operands to the
 ; BUFFER_LOAD instructions end up being stored in vgprs.  This
@@ -24,7 +24,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() #0
 ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}
 ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}
 
-define amdgpu_kernel void @mubuf(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @mubuf(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = call i32 @llvm.amdgcn.workitem.id.y()
@@ -64,7 +64,7 @@ done:                                             ; preds = %loop
 ; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]
 ; GCN-NOHSA: buffer_store_dword [[V_OUT]]
 ; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]]
-define amdgpu_kernel void @smrd_valu(ptr addrspace(1) %in, i32 %a, i32 %b, ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @smrd_valu(ptr addrspace(1) %in, i32 %a, i32 %b, ptr addrspace(1) %out) nounwind {
 entry:
   %tmp = icmp ne i32 %a, 0
   br i1 %tmp, label %if, label %else
@@ -92,7 +92,7 @@ endif:                                            ; preds = %else, %if
 ; GCN-NOHSA-NOT: v_add
 ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}}
 ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @smrd_valu2(ptr addrspace(1) %out, ptr addrspace(4) %in) #1 {
+define amdgpu_kernel void @smrd_valu2(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp, 4
@@ -112,7 +112,7 @@ entry:
 ; GCN-NOHSA: buffer_store_dword
 ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}
-define amdgpu_kernel void @smrd_valu_ci_offset(ptr addrspace(1) %out, ptr addrspace(4) %in, i32 %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset(ptr addrspace(1) %out, ptr addrspace(4) %in, i32 %c) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr i32, ptr addrspace(4) %in, i32 %tmp
@@ -132,7 +132,7 @@ entry:
 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; GCN-NOHSA: buffer_store_dwordx2
 ; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @smrd_valu_ci_offset_x2(ptr addrspace(1) %out, ptr addrspace(4) %in, i64 %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x2(ptr addrspace(1) %out, ptr addrspace(4) %in, i64 %c) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr i64, ptr addrspace(4) %in, i32 %tmp
@@ -154,7 +154,7 @@ entry:
 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; GCN-NOHSA: buffer_store_dwordx4
 ; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @smrd_valu_ci_offset_x4(ptr addrspace(1) %out, ptr addrspace(4) %in, <4 x i32> %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x4(ptr addrspace(1) %out, ptr addrspace(4) %in, <4 x i32> %c) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr <4 x i32>, ptr addrspace(4) %in, i32 %tmp
@@ -188,7 +188,7 @@ entry:
 ; GCN-NOHSA: buffer_store_dwordx4
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @smrd_valu_ci_offset_x8(ptr addrspace(1) %out, ptr addrspace(4) %in, <8 x i32> %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x8(ptr addrspace(1) %out, ptr addrspace(4) %in, <8 x i32> %c) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr <8 x i32>, ptr addrspace(4) %in, i32 %tmp
@@ -234,7 +234,7 @@ entry:
 ; GCN-HSA: flat_load_dwordx4
 
 ; GCN: s_endpgm
-define amdgpu_kernel void @smrd_valu_ci_offset_x16(ptr addrspace(1) %out, ptr addrspace(4) %in, <16 x i32> %c) #1 {
+define amdgpu_kernel void @smrd_valu_ci_offset_x16(ptr addrspace(1) %out, ptr addrspace(4) %in, <16 x i32> %c) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr <16 x i32>, ptr addrspace(4) %in, i32 %tmp
@@ -251,7 +251,7 @@ entry:
 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]]
 ; GCN-NOHSA: buffer_store_dword [[ADD]]
 ; GCN-HSA: flat_store_dword {{.*}}, [[ADD]]
-define amdgpu_kernel void @smrd_valu2_salu_user(ptr addrspace(1) %out, ptr addrspace(4) %in, i32 %a) #1 {
+define amdgpu_kernel void @smrd_valu2_salu_user(ptr addrspace(1) %out, ptr addrspace(4) %in, i32 %a) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp, 4
@@ -265,7 +265,7 @@ entry:
 ; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset:
 ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
 ; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @smrd_valu2_max_smrd_offset(ptr addrspace(1) %out, ptr addrspace(4) %in) #1 {
+define amdgpu_kernel void @smrd_valu2_max_smrd_offset(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp, 4
@@ -279,7 +279,7 @@ entry:
 ; GCN-NOHSA-NOT: v_add
 ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}}
 ; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @smrd_valu2_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(4) %in) #1 {
+define amdgpu_kernel void @smrd_valu2_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 entry:
   %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp, 4
@@ -294,7 +294,7 @@ entry:
 ; GCN-NOHSA: buffer_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @s_load_imm_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) nounwind {
 entry:
   %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = getelementptr inbounds i32, ptr addrspace(4) %in, i32 %tmp0
@@ -316,7 +316,7 @@ entry:
 ; GCN-NOHSA: buffer_store_dword
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @s_load_imm_v8i32_salu_user(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v8i32_salu_user(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) nounwind {
 entry:
   %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = getelementptr inbounds i32, ptr addrspace(4) %in, i32 %tmp0
@@ -352,7 +352,7 @@ entry:
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @s_load_imm_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) nounwind {
 entry:
   %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = getelementptr inbounds i32, ptr addrspace(4) %in, i32 %tmp0
@@ -386,7 +386,7 @@ entry:
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
 ; GCN-HSA: flat_load_dwordx4
-define amdgpu_kernel void @s_load_imm_v16i32_salu_user(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) #1 {
+define amdgpu_kernel void @s_load_imm_v16i32_salu_user(ptr addrspace(1) %out, ptr addrspace(4) nocapture readonly %in) nounwind {
 entry:
   %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = getelementptr inbounds i32, ptr addrspace(4) %in, i32 %tmp0
@@ -501,6 +501,3 @@ loop:
 exit:
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
index 1b2dbec2b95c60..b92fff9dd3ea21 100644
--- a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
@@ -7,7 +7,7 @@
 ; EG-NEXT: 0(
 define amdgpu_kernel void @test_0(i32 %in0, ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) #0
+  %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -19,7 +19,7 @@ entry:
 ; EG-NEXT: 1(
 define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) #0
+  %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
@@ -31,15 +31,13 @@ entry:
 ; EG-NEXT: 2(
 define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, ptr addrspace(1) %out) {
 entry:
-  %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) #0
+  %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) readnone
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
 
-declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) #0
-
-attributes #0 = { readnone }
+declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) readnone
 
 !opencl.kernels = !{!0, !1, !2}
 
diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll
index baee88b69d0602..8e6a8b4c3bb1e4 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefixes=GFX908 %s
 ; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A %s
 
-define amdgpu_kernel void @scalar_to_vector_v8i16(<2 x i32> %in, ptr %out) #0 {
+define amdgpu_kernel void @scalar_to_vector_v8i16(<2 x i32> %in, ptr %out) nounwind {
 ; GFX900-LABEL: scalar_to_vector_v8i16:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -74,7 +74,7 @@ entry:
 
   %val.6.vec8.i16 = shufflevector <8 x i16> %val.5.vec8.i16, <8 x i16> %val.3.vec8.i16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
 
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %out.gep = getelementptr inbounds <8 x i16>, ptr %out, i64 %tid.ext
   store <8 x i16> %val.6.vec8.i16, ptr %out.gep, align 16
@@ -82,7 +82,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @scalar_to_vector_v8f16(<2 x float> %in, ptr %out) #0 {
+define amdgpu_kernel void @scalar_to_vector_v8f16(<2 x float> %in, ptr %out) nounwind {
 ; GFX900-LABEL: scalar_to_vector_v8f16:
 ; GFX900:       ; %bb.0: ; %entry
 ; GFX900-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -152,7 +152,7 @@ entry:
 
   %val.6.vec8.half = shufflevector <8 x half> %val.5.vec8.half, <8 x half> %val.3.vec8.half, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
 
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %out.gep = getelementptr inbounds <8 x half>, ptr %out, i64 %tid.ext
   store <8 x half> %val.6.vec8.half, ptr %out.gep, align 16
@@ -160,7 +160,4 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
index 5f291489848fe6..6304ae10c00abc 100644
--- a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
@@ -5,7 +5,7 @@
 ; while trying to spill SGPRs to memory. After we enabled SGPR spills into virtual VGPRs
 ; the edge case won't arise and the test would always compile.
 
-define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
+define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) nounwind "amdgpu-waves-per-eu"="10,10" {
 ; CHECK-LABEL: kernel0:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    ; implicit-def: $vgpr23 : SGPR spill to VGPR lane
@@ -376,61 +376,58 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
 ; CHECK-NEXT:    ; kill: killed $vgpr23
 ; CHECK-NEXT:    ; kill: killed $vgpr0
 ; CHECK-NEXT:    s_endpgm
-  call void asm sideeffect "", "~{v[0:7]}" () #0
-  call void asm sideeffect "", "~{v[8:15]}" () #0
-  call void asm sideeffect "", "~{v[16:19]}"() #0
-  call void asm sideeffect "", "~{v[20:21]}"() #0
-  call void asm sideeffect "", "~{v22}"() #0
+  call void asm sideeffect "", "~{v[0:7]}" () nounwind
+  call void asm sideeffect "", "~{v[8:15]}" () nounwind
+  call void asm sideeffect "", "~{v[16:19]}"() nounwind
+  call void asm sideeffect "", "~{v[20:21]}"() nounwind
+  call void asm sideeffect "", "~{v22}"() nounwind
 
-  %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val5 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val7 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val8 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val9 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val11 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val12 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val13 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val15 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val16 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val17 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val18 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val19 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+  %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val2 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val3 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val4 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val5 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val6 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val7 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val8 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val9 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val10 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val11 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val12 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val13 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val14 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val15 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val16 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val17 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val18 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val19 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val0) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val1) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val2) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val3) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val4) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val5) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val6) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val7) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val8) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val9) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val10) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val11) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val12) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val13) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val14) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val15) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val16) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val17) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val18) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val19) #0
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val0) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val1) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val2) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val3) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val4) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val5) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val6) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val7) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val8) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val9) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val10) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val11) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val12) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val13) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val14) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val15) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val16) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val17) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val18) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val19) nounwind
   br label %ret
 
 ret:
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="10,10" }
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index c0d199920bd94e..e659c2db955fbb 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -11,7 +11,7 @@
 
   @sched_dbg_value_crash.tmp6 = internal unnamed_addr addrspace(3) global [256 x [16 x i8]] undef, align 16
 
-  define amdgpu_kernel void @sched_dbg_value_crash(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture readonly %arg1, ptr addrspace(1) nocapture readonly %arg2, ptr addrspace(1) nocapture readonly %arg3, ptr addrspace(1) nocapture %arg4) local_unnamed_addr #2 {
+  define amdgpu_kernel void @sched_dbg_value_crash(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture readonly %arg1, ptr addrspace(1) nocapture readonly %arg2, ptr addrspace(1) nocapture readonly %arg3, ptr addrspace(1) nocapture %arg4) local_unnamed_addr convergent nounwind "amdgpu-dispatch-ptr" "amdgpu-flat-scratch" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="gfx900" {
   bb:
     %0 = getelementptr i32, ptr addrspace(1) %arg1, i64 0, !amdgpu.uniform !3, !amdgpu.noclobber !3
     %tmp5 = alloca %struct.wombat, align 16, addrspace(5)
@@ -33,7 +33,7 @@
     %16 = add i32 %15, %11
     %17 = getelementptr inbounds [256 x [16 x i8]], ptr addrspace(3) @sched_dbg_value_crash.tmp6, i32 0, i32 %16
     %tmp7 = load i64, ptr addrspace(4) null, align 536870912
-    %tmp8 = tail call i32 @llvm.amdgcn.workitem.id.x() #3, !range !4
+    %tmp8 = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind, !range !4
     %tmp9 = zext i32 %tmp8 to i64
     %tmp10 = add i64 %tmp7, %tmp9
     %tmp11 = shl i64 %tmp10, 32
@@ -49,7 +49,7 @@
     %tmp21 = getelementptr inbounds i8, ptr addrspace(1) %arg, i64 %tmp20
     %tmp22 = bitcast ptr addrspace(1) %tmp21 to ptr addrspace(1)
     %tmp23 = bitcast ptr addrspace(5) %tmp5 to ptr addrspace(5)
-    call void @llvm.lifetime.start.p5(i64 144, ptr addrspace(5) nonnull %tmp23) #3
+    call void @llvm.lifetime.start.p5(i64 144, ptr addrspace(5) nonnull %tmp23) nounwind
     %tmp24 = getelementptr inbounds %struct.wombat, ptr addrspace(5) %tmp5, i32 0, i32 6
     %tmp25 = getelementptr i32, ptr addrspace(1) %arg1, i64 3, !amdgpu.uniform !3, !amdgpu.noclobber !3
     %tmp26 = load i32, ptr addrspace(1) %tmp25, align 4
@@ -115,7 +115,7 @@
     %tmp85 = fadd fast float %tmp84, undef
     %tmp86 = fmul float %tmp82, %tmp82
     %tmp87 = fdiv float 1.000000e+00, %tmp86
-    tail call void @llvm.dbg.value(metadata float %tmp87, metadata !5, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #3, !dbg !8
+    tail call void @llvm.dbg.value(metadata float %tmp87, metadata !5, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) nounwind, !dbg !8
     %tmp88 = fmul float %tmp82, 0.000000e+00
     %tmp89 = fsub fast float %tmp85, %tmp88
     %tmp90 = fdiv float %tmp89, %tmp86
@@ -129,31 +129,26 @@
     %extractelement = extractelement <2 x float> %fadd, i64 1
     %tmp96 = fsub float %extractelement, %tmp95
     %tmp97 = getelementptr inbounds %struct.wombat, ptr addrspace(5) %tmp5, i32 0, i32 8, i32 1
-    call void @func(float %tmp96, i64 0, ptr addrspace(5) nonnull %tmp97) #3
+    call void @func(float %tmp96, i64 0, ptr addrspace(5) nonnull %tmp97) nounwind
     %tmp984 = bitcast ptr addrspace(3) %17 to ptr addrspace(3)
     %tmp99 = getelementptr inbounds %struct.snork, ptr addrspace(1) %arg4, i64 %tmp12, i32 8, i32 1, i64 0
     call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) %tmp99, ptr addrspace(3) %tmp984, i64 16, i32 16, i1 false)
-    call void @llvm.lifetime.end.p5(i64 144, ptr addrspace(5) nonnull %tmp23) #3
+    call void @llvm.lifetime.end.p5(i64 144, ptr addrspace(5) nonnull %tmp23) nounwind
     ret void
   }
 
   declare void @func(float, i64, ptr addrspace(5))
-  declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #0
-  declare float @llvm.fmuladd.f32(float, float, float) #1
-  declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #0
-  declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
-  declare i32 @llvm.amdgcn.workitem.id.x() #1
-  declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-  declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1
-  declare i32 @llvm.amdgcn.workitem.id.y() #1
-  declare i32 @llvm.amdgcn.workitem.id.z() #1
-  declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i32, i1) #0
-  declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(3) nocapture readonly, i64, i32, i1) #0
-
-  attributes #0 = { argmemonly nounwind }
-  attributes #1 = { nounwind readnone speculatable }
-  attributes #2 = { convergent nounwind "amdgpu-dispatch-ptr" "amdgpu-flat-scratch" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="gfx900" }
-  attributes #3 = { nounwind }
+  declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) argmemonly nounwind
+  declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone speculatable
+  declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) argmemonly nounwind
+  declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone speculatable
+  declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
+  declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone speculatable
+  declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable
+  declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
+  declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone speculatable
+  declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i32, i1) argmemonly nounwind
+  declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(3) nocapture readonly, i64, i32, i1) argmemonly nounwind
 
   !llvm.dbg.cu = !{!0}
   !llvm.module.flags = !{!2}
diff --git a/llvm/test/CodeGen/AMDGPU/sched.barrier.inverted.mask.ll b/llvm/test/CodeGen/AMDGPU/sched.barrier.inverted.mask.ll
index c20dbba42ccd4e..28eeee898286c7 100644
--- a/llvm/test/CodeGen/AMDGPU/sched.barrier.inverted.mask.ll
+++ b/llvm/test/CodeGen/AMDGPU/sched.barrier.inverted.mask.ll
@@ -7,105 +7,102 @@
 
 ; Inverted 1008: 01111110000 
 ; GCN: After Inverting, SchedGroup Mask: 1008
-define amdgpu_kernel void @invert1() #0 {
+define amdgpu_kernel void @invert1() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 1) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 1) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 2044: 11111111100 
 ; GCN:       After Inverting, SchedGroup Mask: 2044
-define amdgpu_kernel void @invert2() #0 {
+define amdgpu_kernel void @invert2() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 2) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 2) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 2042: 11111111010
 ; GCN:       After Inverting, SchedGroup Mask: 2042
-define amdgpu_kernel void @invert4() #0 {
+define amdgpu_kernel void @invert4() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 4) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 4) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 2038: 11111110110
 ; GCN:       After Inverting, SchedGroup Mask: 2038
-define amdgpu_kernel void @invert8() #0 {
+define amdgpu_kernel void @invert8() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 8) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 8) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 1935: 11110001111
 ; GCN:       After Inverting, SchedGroup Mask: 1935
-define amdgpu_kernel void @invert16() #0 {
+define amdgpu_kernel void @invert16() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 16) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 16) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 1999: 11111001111
 ; GCN:       After Inverting, SchedGroup Mask: 1999
-define amdgpu_kernel void @invert32() #0 {
+define amdgpu_kernel void @invert32() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 32) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 32) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 1967: 11110101111
 ; GCN:       After Inverting, SchedGroup Mask: 1967
-define amdgpu_kernel void @invert64() #0 {
+define amdgpu_kernel void @invert64() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 64) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 64) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 1151: 10001111111
 ; GCN:       After Inverting, SchedGroup Mask: 1151
-define amdgpu_kernel void @invert128() #0 {
+define amdgpu_kernel void @invert128() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 128) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 128) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 1663: 11001111111
 ; GCN:       After Inverting, SchedGroup Mask: 1663
-define amdgpu_kernel void @invert256() #0 {
+define amdgpu_kernel void @invert256() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 256) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 256) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 1407: 10101111111
 ; GCN:       After Inverting, SchedGroup Mask: 1407
-define amdgpu_kernel void @invert512() #0 {
+define amdgpu_kernel void @invert512() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 512) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 512) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
 ; Inverted 1022: 01111111110
 ; GCN:       After Inverting, SchedGroup Mask: 1022
-define amdgpu_kernel void @invert1024() #0 {
+define amdgpu_kernel void @invert1024() nounwind {
 entry:
-  call void @llvm.amdgcn.sched.barrier(i32 1024) #1
-  call void @llvm.amdcn.s.nop(i16 0) #1
+  call void @llvm.amdgcn.sched.barrier(i32 1024) convergent nounwind
+  call void @llvm.amdcn.s.nop(i16 0) convergent nounwind
   ret void
 }
 
-declare void @llvm.amdgcn.sched.barrier(i32) #1
-declare void @llvm.amdcn.s.nop(i16) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
+declare void @llvm.amdgcn.sched.barrier(i32) convergent nounwind
+declare void @llvm.amdcn.s.nop(i16) convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll
index ae779eb3410ffb..52c0bf2a419967 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s
 ; REQUIRES: asserts
 
-define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) nounwind {
 main_body:
   %tmp = extractelement <4 x float> %reg1, i32 0
   %tmp5 = extractelement <4 x float> %reg1, i32 1
@@ -76,10 +76,7 @@ ELSE17:                                           ; preds = %ELSE
   br label %ENDIF
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
 
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll
index 57d3c7fc8bd019..4c3153297e6b68 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s
 ; REQUIRES: asserts
 
-define amdgpu_kernel void @main() #0 {
+define amdgpu_kernel void @main() nounwind {
 main_body:
   %tmp = load <4 x float>, ptr addrspace(9) null
   %tmp5 = extractelement <4 x float> %tmp, i32 3
@@ -83,9 +83,6 @@ ENDIF30:                                          ; preds = %LOOP29
   br label %LOOP29
 }
 
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #0
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) nounwind
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop.ll b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop.ll
index 9f0b2119d0f4e4..a5ea080c7e3cad 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s
 ; REQUIRES: asserts
 
-define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) nounwind {
 main_body:
   %tmp = extractelement <4 x float> %reg1, i32 0
   %tmp5 = extractelement <4 x float> %reg1, i32 1
@@ -76,9 +76,6 @@ ELSE17:                                           ; preds = %ELSE
   br label %ENDIF
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-global-loads.ll b/llvm/test/CodeGen/AMDGPU/schedule-global-loads.ll
index c9857371b855da..54057097eeb9ee 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-global-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-global-loads.ll
@@ -10,7 +10,7 @@
 ; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
 ; SI: buffer_store_dword [[REG0]]
 ; SI: buffer_store_dword [[REG1]]
-define amdgpu_kernel void @cluster_global_arg_loads(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @cluster_global_arg_loads(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %ptr) nounwind {
   %load0 = load i32, ptr addrspace(1) %ptr, align 4
   %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 2
   %load1 = load i32, ptr addrspace(1) %gep, align 4
@@ -33,6 +33,3 @@ entry:
   store i32 %tmp2, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-if-2.ll b/llvm/test/CodeGen/AMDGPU/schedule-if-2.ll
index 1888df188ac075..985d00bd4ea844 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-if-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-if-2.ll
@@ -87,8 +87,6 @@ IF23:                                             ; preds = %ELSE
   br label %ENDIF
 }
 
-declare float @fabsf(float) #0
+declare float @fabsf(float) readonly
 
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #0 = { readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-ilp.ll b/llvm/test/CodeGen/AMDGPU/schedule-ilp.ll
index 11602b1d353f91..c1f526ac83d7b5 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-ilp.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-ilp.ll
@@ -4,7 +4,7 @@
 
 ; CHECK: NumVgprs: {{[0-9][0-9][0-9]$}}
 
-define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) #0 {
+define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 1
   %tmp2 = load float, ptr addrspace(3) %tmp, align 4
@@ -585,7 +585,4 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #1
-
-attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
-attributes #1 = { nounwind readnone }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
index bc37c994d226b3..821e4f54141230 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
@@ -1,11 +1,9 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -start-before=machine-scheduler -stop-after=greedy,1 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
 
 --- |
-  define amdgpu_kernel void @no_sched_metric_due_to_spills() #0 {
+  define amdgpu_kernel void @no_sched_metric_due_to_spills() "amdgpu-flat-work-group-size"="1,256" {
     ret void
   }
-
-  attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
 ...
 
 # GCN-LABEL: name: no_sched_metric_due_to_spills
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
index bd1258cb1cf980..85054280147639 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
@@ -15,7 +15,7 @@
 ; GCN: NumVgprs: {{[0-9]$}}
 ; GCN: ScratchSize: 0{{$}}
 
-define amdgpu_kernel void @load_store_max_9vgprs(ptr addrspace(1) nocapture noalias readonly %arg, ptr addrspace(1) nocapture noalias %arg1, i1 %cnd) #1 {
+define amdgpu_kernel void @load_store_max_9vgprs(ptr addrspace(1) nocapture noalias readonly %arg, ptr addrspace(1) nocapture noalias %arg1, i1 %cnd) "amdgpu-num-vgpr"="9" {
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %base = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %id
@@ -39,7 +39,4 @@ bb2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { "amdgpu-num-vgpr"="9" }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll
index 3ba80380091bb8..36340df82cb7b0 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll
@@ -586,6 +586,4 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #0
-
-attributes #0 = { nounwind readnone }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll
index 2c9d24ee04ebf2..bd993f4c9e3137 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll
@@ -286,5 +286,4 @@ bb:
   store float %res.29, ptr addrspace(1) %adr.res.29, align 4
   ret void
 }
-declare float @llvm.fmuladd.f32(float, float, float) #0
-attributes #0 = { nounwind readnone }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit3.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit3.ll
index 96b40bca5e2e3a..2eb4f7b09ceb94 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit3.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit3.ll
@@ -13,7 +13,7 @@
 ;
 ; MISCHED: NumVgprs: {{[7-9][0-9]$}}
 
-define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) #1 {
+define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 1
   %tmp2 = load float, ptr addrspace(3) %tmp, align 4
@@ -594,7 +594,4 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-misched-max-waves.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-misched-max-waves.ll
index 26f9ba4c278083..7a6cf1d6977ad7 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-misched-max-waves.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-misched-max-waves.ll
@@ -8,7 +8,7 @@
 ; CHECK-NOT: REG-CRIT
 ; CHECK-NOT: REG-EXCESS
 
-define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) #1 {
+define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(3) nocapture %arg1) "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" {
 bb:
   %tmp0 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 1
   %tmp1 = load float, ptr addrspace(3) %tmp0, align 4
@@ -104,7 +104,4 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-relaxed-occupancy.ll b/llvm/test/CodeGen/AMDGPU/schedule-relaxed-occupancy.ll
index 94815558bf3d6d..1aa55e27152acb 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-relaxed-occupancy.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-relaxed-occupancy.ll
@@ -12,7 +12,7 @@
 ; OCC:    Occupancy: 8
 ; RELAX: Occupancy: 4
 
-define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) #1 {
+define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) "amdgpu-wave-limiter"="true" {
 bb:
   %tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 1
   %tmp2 = load float, ptr addrspace(3) %tmp, align 4
@@ -593,7 +593,4 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { "amdgpu-wave-limiter"="true" }
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll b/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll
index a7e361b4b67cc2..908518216c22f0 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll
@@ -6,7 +6,7 @@ declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half>, <4 x half>, <3
 ; CHECK: CritRes: {{[0-9]+}} HWXDL
 ; CHECK: Picking: Cand SU([[nid:[0-9]+]]) RES-DEMAND
 ; CHECK: Scheduling SU([[nid]]) {{.*}} V_MFMA_F32_32X32X4F16
-define amdgpu_kernel void @schedule-xdl-resource(ptr addrspace(1) %in, ptr addrspace(1) %out, ptr addrspace(3) %lds, i32 %stride) #0 {
+define amdgpu_kernel void @schedule-xdl-resource(ptr addrspace(1) %in, ptr addrspace(1) %out, ptr addrspace(3) %lds, i32 %stride) nounwind "amdgpu-waves-per-eu"="1,1" {
   %in_ptr.1 = getelementptr <32 x float>, ptr addrspace(1) %in, i32 %stride
   %in_ptr.2 = getelementptr <32 x float>, ptr addrspace(1) %in_ptr.1, i32 %stride
   %in_ptr.3 = getelementptr <32 x float>, ptr addrspace(1) %in_ptr.2, i32 %stride
@@ -40,5 +40,3 @@ define amdgpu_kernel void @schedule-xdl-resource(ptr addrspace(1) %in, ptr addrs
 
   ret void
 }
-
-attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }
diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
index 1c912d09c47d18..fc653d1a313e31 100644
--- a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
@@ -13,7 +13,7 @@
 
 target triple = "amdgcn--"
 
-define amdgpu_gs void @main(i32 inreg %arg) #0 {
+define amdgpu_gs void @main(i32 inreg %arg) nounwind "target-cpu"="tonga" {
 main_body:
   %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 20, i32 0)
   %tmp1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 24, i32 0)
@@ -45,11 +45,6 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) #1
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg, i32 immarg) #3
-
-attributes #0 = { nounwind "target-cpu"="tonga" }
-attributes #1 = { nounwind readnone willreturn }
-attributes #2 = { nounwind readonly willreturn }
-attributes #3 = { nounwind willreturn writeonly }
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) nounwind readnone willreturn
+declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg) nounwind readonly willreturn
+declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg, i32 immarg) nounwind willreturn writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
index c9dbadcbd23157..42536039a9a16a 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDWA,GFX9_10,GFX9 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -amdgpu-sdwa-peephole -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDWA,GFX9_10,GFX10 %s
 
-define amdgpu_kernel void @add_shr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @add_shr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: add_shr_i32:
 ; NOSDWA:       ; %bb.0:
 ; NOSDWA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -62,7 +62,7 @@ define amdgpu_kernel void @add_shr_i32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @sub_shr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @sub_shr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: sub_shr_i32:
 ; NOSDWA:       ; %bb.0:
 ; NOSDWA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -120,7 +120,7 @@ define amdgpu_kernel void @sub_shr_i32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @mul_shr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @mul_shr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_shr_i32:
 ; NOSDWA:       ; %bb.0:
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -207,7 +207,7 @@ define amdgpu_kernel void @mul_shr_i32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @mul_i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_i16:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -290,7 +290,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_v2i16:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -380,7 +380,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_v4i16:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -481,7 +481,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_v8i16:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -604,7 +604,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_half:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -676,7 +676,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_v2half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_v2half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_v2half:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -756,7 +756,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_v4half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_v4half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_v4half:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -847,7 +847,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_v8half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_v8half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_v8half:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -960,7 +960,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_i8(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_i8(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_i8:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -1039,7 +1039,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_v2i8(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_v2i8(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_v2i8:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -1139,7 +1139,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_v4i8:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -1268,7 +1268,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mul_v8i8(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mul_v8i8(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mul_v8i8:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -1508,7 +1508,7 @@ define amdgpu_kernel void @sitofp_v2i16_to_v2f16(
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
     ptr addrspace(1) %r,
-    ptr addrspace(1) %a) #0 {
+    ptr addrspace(1) %a) "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x i16>, ptr addrspace(1) %a
   %r.val = sitofp <2 x i16> %a.val to <2 x half>
@@ -1516,7 +1516,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @mac_v2half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mac_v2half(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mac_v2half:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -1602,7 +1602,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @immediate_mul_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @immediate_mul_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: immediate_mul_v2i16:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1676,7 +1676,7 @@ entry:
 }
 
 ; Double use of same src - should not convert it
-define amdgpu_kernel void @mulmul_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @mulmul_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: mulmul_v2i16:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -1776,7 +1776,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @add_bb_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 {
+define amdgpu_kernel void @add_bb_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: add_bb_v2i16:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -1862,7 +1862,7 @@ store_label:
 }
 
 ; Check that "pulling out" SDWA operands works correctly.
-define amdgpu_kernel void @pulled_out_test(ptr addrspace(1) %sourceA, ptr addrspace(1) %destValues) #0 {
+define amdgpu_kernel void @pulled_out_test(ptr addrspace(1) %sourceA, ptr addrspace(1) %destValues) "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: pulled_out_test:
 ; NOSDWA:       ; %bb.0: ; %entry
 ; NOSDWA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -2012,7 +2012,7 @@ entry:
 }
 
 ; TODO: Why is the constant not peepholed into the v_or_b32_e32?
-define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 {
+define amdgpu_kernel void @sdwa_crash_inlineasm_def() "denormal-fp-math"="preserve-sign,preserve-sign" {
 ; NOSDWA-LABEL: sdwa_crash_inlineasm_def:
 ; NOSDWA:       ; %bb.0: ; %bb
 ; NOSDWA-NEXT:    s_mov_b32 s0, 0xffff
@@ -2206,8 +2206,6 @@ bb2:
 }
 
 declare i32 @llvm.amdgcn.workitem.id.x()
-
-attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
 ; GFX9_10: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
index c94bf01fa460a4..d97f57ebe8511c 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
@@ -37,8 +37,5 @@ define amdgpu_kernel void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 {
   ret void
 }
 
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.amdgcn.rcp.legacy(float) nounwind readnone
+declare float @llvm.amdgcn.fmul.legacy(float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
index 7c1da18de70f83..0307098ca56022 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
@@ -1771,8 +1771,6 @@ define half @select_fneg_posk_src_fmad_f16(i32 %c, half %x, half %z) {
   ret half %select
 }
 
-declare half @llvm.fabs.f16(half) #0
-declare half @llvm.fma.f16(half, half, half) #0
-declare half @llvm.fmuladd.f16(half, half, half) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare half @llvm.fabs.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fma.f16(half, half, half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare half @llvm.fmuladd.f16(half, half, half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.legal.f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.legal.f16.ll
index ae3da60ef6c93f..3bd480ee433373 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.legal.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.legal.f16.ll
@@ -18,6 +18,4 @@ define half @select_fneg_posk_src_rcp_f16(i32 %c, half %x, half %y) {
   ret half %select
 }
 
-declare half @llvm.amdgcn.rcp.f16(half) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare half @llvm.amdgcn.rcp.f16(half) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
index b18e34941d4fea..408d5ce2f86f9c 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
@@ -8,7 +8,7 @@
 
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
-define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -30,7 +30,7 @@ define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 {
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]]
-define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -57,7 +57,7 @@ define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
 
 ; GCN: buffer_store_dword [[ADD]]
 ; GCN: buffer_store_dword [[X_ABS]]
-define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -80,7 +80,7 @@ define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c)
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]]
-define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -103,7 +103,7 @@ define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], |[[X]]|,
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -121,7 +121,7 @@ define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|,
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -138,7 +138,7 @@ define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]]
-define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, float -2.0, float -1.0
@@ -153,7 +153,7 @@ define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
-define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, float 2.0, float 1.0
@@ -170,7 +170,7 @@ define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, [[VCC]]
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -190,7 +190,7 @@ define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -207,7 +207,7 @@ define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
 
@@ -227,7 +227,7 @@ define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -245,7 +245,7 @@ define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
-define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -267,7 +267,7 @@ define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]]
-define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -294,7 +294,7 @@ define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
 
 ; GCN: buffer_store_dword [[ADD]]
 ; GCN: buffer_store_dword [[NEG_X]]
-define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -317,7 +317,7 @@ define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c)
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]]
-define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -340,7 +340,7 @@ define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], -[[X]],
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -358,7 +358,7 @@ define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
-define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -376,7 +376,7 @@ define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
-define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -396,7 +396,7 @@ define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
 ; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
 
 ; GCN: v_sub_f32_e32 v{{[0-9]+}},  [[Y]], [[SELECT]]
-define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -413,7 +413,7 @@ define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
 ; GCN: s_cmp_eq_u32
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
-define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, float -2.0, float -1.0
@@ -430,7 +430,7 @@ define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
 ; GCN: s_cmp_eq_u32
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
-define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, float -2048.0, float -4096.0
@@ -444,7 +444,7 @@ define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
-define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, float -2.0, float -1.0
@@ -462,7 +462,7 @@ define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64 vcc, -1, 0
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
-define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -479,7 +479,7 @@ define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
-define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -498,7 +498,7 @@ define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64 vcc, -1, 0
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
-define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -516,7 +516,7 @@ define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -|[[X]]|,
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -537,7 +537,7 @@ define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[Y]]|, |[[X]]|,
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -558,7 +558,7 @@ define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -[[X]],
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -578,7 +578,7 @@ define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -[[Y]], |[[X]]|,
 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
-define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -598,7 +598,7 @@ define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, [[X]],
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
-define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -619,7 +619,7 @@ define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, [[Y]],
 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
-define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
+define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
@@ -641,7 +641,7 @@ define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -661,7 +661,7 @@ define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -679,7 +679,7 @@ define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -699,7 +699,7 @@ define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, [[VCC]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -722,7 +722,7 @@ define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
 ; GCN-NEXT: buffer_store_dword [[SELECT]]
-define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -739,7 +739,7 @@ define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
 ; GCN-NEXT: buffer_store_dword [[SELECT]]
-define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
   %add = fsub float %x, 4.0
@@ -755,7 +755,7 @@ define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
 ; GCN-NEXT: buffer_store_dword [[SELECT]]
-define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
   %mul = fmul float %x, 4.0
@@ -772,7 +772,7 @@ define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
 ; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
 ; GCN-NEXT: buffer_store_dword [[SELECT]]
-define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -789,7 +789,7 @@ define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
 
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
 ; GCN-NEXT: buffer_store_dword [[SELECT]]
-define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %z = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -807,7 +807,7 @@ define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[X]]
 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc
 ; GCN-NEXT: buffer_store_dword [[SELECT]]
-define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
+define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -831,7 +831,7 @@ define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
 
 ; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0.15915494, -|[[X]]|, [[VCC]]
 ; VI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_negfabs_posk_inv2pi_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negfabs_posk_inv2pi_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -858,7 +858,7 @@ define amdgpu_kernel void @mul_select_negfabs_posk_inv2pi_f32(i32 %c) #0 {
 
 ; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0.15915494, -|[[X]]|, [[VCC]]
 ; VI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_posk_inv2pi_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_posk_inv2pi_negfabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -876,7 +876,7 @@ define amdgpu_kernel void @mul_select_posk_inv2pi_negfabs_f32(i32 %c) #0 {
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, s
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_negfabs_negk_inv2pi_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negfabs_negk_inv2pi_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -897,7 +897,7 @@ define amdgpu_kernel void @mul_select_negfabs_negk_inv2pi_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64 s[0:1], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, s[0:1]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_negk_inv2pi_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negk_inv2pi_negfabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -916,7 +916,7 @@ define amdgpu_kernel void @mul_select_negk_inv2pi_negfabs_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, -|[[X]]|, [[VCC]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_negfabs_posk_0_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negfabs_posk_0_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -937,7 +937,7 @@ define amdgpu_kernel void @mul_select_negfabs_posk_0_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, -|[[X]]|, [[VCC]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -956,7 +956,7 @@ define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) #0 {
 ; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -977,7 +977,7 @@ define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
 ; GCN: s_cselect_b64 s[0:1], -1, 0
 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s[0:1]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
-define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 {
+define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) nounwind {
   %x = load volatile float, ptr addrspace(1) undef
   %y = load volatile float, ptr addrspace(1) undef
   %cmp = icmp eq i32 %c, 0
@@ -990,12 +990,9 @@ define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 {
 }
 
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.fma.f32(float, float, float) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare float @llvm.amdgcn.rcp.f32(float) #1
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
+declare float @llvm.amdgcn.rcp.legacy(float) nounwind readnone
+declare float @llvm.amdgcn.fmul.legacy(float, float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index d2bb971b680307..5b66fed0f4c2f6 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -3364,8 +3364,6 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
   ret <2 x half> %select
 }
 
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
-declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/select-opt.ll b/llvm/test/CodeGen/AMDGPU/select-opt.ll
index 9ef384fb730519..9e1baa7159113e 100644
--- a/llvm/test/CodeGen/AMDGPU/select-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-opt.ll
@@ -14,7 +14,7 @@
 ; GCN: s_cselect_b32 [[RESULT:s[0-9]+]]
 ; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
 ; GCN: buffer_store_dword [[VRESULT]]
-define amdgpu_kernel void @opt_select_i32_and_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @opt_select_i32_and_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) nounwind {
   %icmp0 = icmp ne i32 %a, %b
   %icmp1 = icmp ne i32 %a, %c
   %and = and i1 %icmp0, %icmp1
@@ -31,7 +31,7 @@ define amdgpu_kernel void @opt_select_i32_and_cmp_i32(ptr addrspace(1) %out, i32
 ; GCN: s_cselect_b32 [[RESULT:s[0-9]+]]
 ; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
 ; GCN: buffer_store_dword [[VRESULT]]
-define amdgpu_kernel void @opt_select_i32_and_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @opt_select_i32_and_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %x, i32 %y) nounwind {
   %fcmp0 = fcmp one float %a, %b
   %fcmp1 = fcmp one float %a, %c
   %and = and i1 %fcmp0, %fcmp1
@@ -52,7 +52,7 @@ define amdgpu_kernel void @opt_select_i32_and_cmp_f32(ptr addrspace(1) %out, flo
 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
 ; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
-define amdgpu_kernel void @opt_select_i64_and_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
+define amdgpu_kernel void @opt_select_i64_and_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) nounwind {
   %icmp0 = icmp ne i32 %a, %b
   %icmp1 = icmp ne i32 %a, %c
   %and = and i1 %icmp0, %icmp1
@@ -71,7 +71,7 @@ define amdgpu_kernel void @opt_select_i64_and_cmp_i32(ptr addrspace(1) %out, i32
 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
 ; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
-define amdgpu_kernel void @opt_select_i64_and_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
+define amdgpu_kernel void @opt_select_i64_and_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i64 %x, i64 %y) nounwind {
   %fcmp0 = fcmp one float %a, %b
   %fcmp1 = fcmp one float %a, %c
   %and = and i1 %fcmp0, %fcmp1
@@ -91,7 +91,7 @@ define amdgpu_kernel void @opt_select_i64_and_cmp_f32(ptr addrspace(1) %out, flo
 ; GCN-DAG: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
 ; GCN: buffer_store_dword [[VRESULT]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @opt_select_i32_or_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @opt_select_i32_or_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %x, i32 %y) nounwind {
   %icmp0 = icmp ne i32 %a, %b
   %icmp1 = icmp ne i32 %a, %c
   %or = or i1 %icmp0, %icmp1
@@ -108,7 +108,7 @@ define amdgpu_kernel void @opt_select_i32_or_cmp_i32(ptr addrspace(1) %out, i32
 ; GCN-DAG: s_cselect_b32 [[RESULT:s[0-9]+]]
 ; GCN-DAG: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
 ; GCN: buffer_store_dword [[VRESULT]]
-define amdgpu_kernel void @opt_select_i32_or_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @opt_select_i32_or_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %x, i32 %y) nounwind {
   %fcmp0 = fcmp one float %a, %b
   %fcmp1 = fcmp one float %a, %c
   %or = or i1 %fcmp0, %fcmp1
@@ -129,7 +129,7 @@ define amdgpu_kernel void @opt_select_i32_or_cmp_f32(ptr addrspace(1) %out, floa
 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
 ; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
-define amdgpu_kernel void @opt_select_i64_or_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) #0 {
+define amdgpu_kernel void @opt_select_i64_or_cmp_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i64 %x, i64 %y) nounwind {
   %icmp0 = icmp ne i32 %a, %b
   %icmp1 = icmp ne i32 %a, %c
   %or = or i1 %icmp0, %icmp1
@@ -148,7 +148,7 @@ define amdgpu_kernel void @opt_select_i64_or_cmp_i32(ptr addrspace(1) %out, i32
 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT1:[0-9]+]], [[RESULT0]]
 ; GCN-DAG: v_mov_b32_e32 v[[VRESULT0:[0-9]+]], [[RESULT1]]
 ; GCN: buffer_store_dwordx2 v[[[VRESULT0]]:[[VRESULT1]]]
-define amdgpu_kernel void @opt_select_i64_or_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i64 %x, i64 %y) #0 {
+define amdgpu_kernel void @opt_select_i64_or_cmp_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i64 %x, i64 %y) nounwind {
   %fcmp0 = fcmp one float %a, %b
   %fcmp1 = fcmp one float %a, %c
   %or = or i1 %fcmp0, %fcmp1
@@ -160,7 +160,7 @@ define amdgpu_kernel void @opt_select_i64_or_cmp_f32(ptr addrspace(1) %out, floa
 ; GCN-LABEL: {{^}}regression:
 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
 
-define amdgpu_kernel void @regression(ptr addrspace(1) %out, float %c0, float %c1) #0 {
+define amdgpu_kernel void @regression(ptr addrspace(1) %out, float %c0, float %c1) nounwind {
 entry:
   %cmp0 = fcmp oeq float %c0, 1.0
   br i1 %cmp0, label %if0, label %endif
@@ -179,5 +179,3 @@ endif:
   store float %tmp2, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/select-vectors.ll b/llvm/test/CodeGen/AMDGPU/select-vectors.ll
index cca44548bb8f8b..e78504614b7caa 100644
--- a/llvm/test/CodeGen/AMDGPU/select-vectors.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-vectors.ll
@@ -16,7 +16,7 @@
 ; SelectionDAGBuilder for some reason changes the select type.
 ; VI: v_cndmask_b32
 ; VI: v_cndmask_b32
-define amdgpu_kernel void @v_select_v2i8(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v2i8(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <2 x i8>, ptr addrspace(1) %a.ptr, align 2
   %b = load <2 x i8>, ptr addrspace(1) %b.ptr, align 2
   %cmp = icmp eq i32 %c, 0
@@ -28,7 +28,7 @@ define amdgpu_kernel void @v_select_v2i8(ptr addrspace(1) %out, ptr addrspace(1)
 ; GCN-LABEL: {{^}}v_select_v4i8:
 ; GCN: v_cndmask_b32_e32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <4 x i8>, ptr addrspace(1) %a.ptr
   %b = load <4 x i8>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -41,7 +41,7 @@ define amdgpu_kernel void @v_select_v4i8(ptr addrspace(1) %out, ptr addrspace(1)
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v8i8(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v8i8(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <8 x i8>, ptr addrspace(1) %a.ptr
   %b = load <8 x i8>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -56,7 +56,7 @@ define amdgpu_kernel void @v_select_v8i8(ptr addrspace(1) %out, ptr addrspace(1)
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v16i8(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v16i8(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <16 x i8>, ptr addrspace(1) %a.ptr
   %b = load <16 x i8>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -71,7 +71,7 @@ define amdgpu_kernel void @v_select_v16i8(ptr addrspace(1) %out, ptr addrspace(1
 
 ; SI: s_cselect_b32
 ; SI-NOT: cndmask
-define amdgpu_kernel void @select_v4i8(ptr addrspace(1) %out, <4 x i8> %a, <4 x i8> %b, i8 %c) #0 {
+define amdgpu_kernel void @select_v4i8(ptr addrspace(1) %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
   %cmp = icmp eq i8 %c, 0
   %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
   store <4 x i8> %select, ptr addrspace(1) %out, align 4
@@ -85,7 +85,7 @@ define amdgpu_kernel void @select_v4i8(ptr addrspace(1) %out, <4 x i8> %a, <4 x
 
 ; SI: s_cselect_b32
 ; SI-NOT: v_cndmask_b32e
-define amdgpu_kernel void @select_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b, i32 %c) #0 {
+define amdgpu_kernel void @select_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <2 x i16> %a, <2 x i16> %b
   store <2 x i16> %select, ptr addrspace(1) %out, align 4
@@ -97,7 +97,7 @@ define amdgpu_kernel void @select_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2
 ; GCN: buffer_load_dword v
 ; GCN: v_cndmask_b32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <2 x i16>, ptr addrspace(1) %a.ptr
   %b = load <2 x i16>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -115,7 +115,7 @@ define amdgpu_kernel void @v_select_v2i16(ptr addrspace(1) %out, ptr addrspace(1
 ; VI: s_cselect_b32
 ; GFX9: cndmask
 ; GFX9: cndmask
-define amdgpu_kernel void @v_select_v3i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v3i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <3 x i16>, ptr addrspace(1) %a.ptr
   %b = load <3 x i16>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -128,7 +128,7 @@ define amdgpu_kernel void @v_select_v3i16(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <4 x i16>, ptr addrspace(1) %a.ptr
   %b = load <4 x i16>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -143,7 +143,7 @@ define amdgpu_kernel void @v_select_v4i16(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v8i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <8 x i16>, ptr addrspace(1) %a.ptr
   %b = load <8 x i16>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -162,7 +162,7 @@ define amdgpu_kernel void @v_select_v8i16(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v16i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <16 x i16>, ptr addrspace(1) %a.ptr
   %b = load <16 x i16>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -189,7 +189,7 @@ define amdgpu_kernel void @v_select_v16i16(ptr addrspace(1) %out, ptr addrspace(
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v32i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <32 x i16>, ptr addrspace(1) %a.ptr
   %b = load <32 x i16>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -205,7 +205,7 @@ define amdgpu_kernel void @v_select_v32i16(ptr addrspace(1) %out, ptr addrspace(
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @s_select_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b, i32 %c) #0 {
+define amdgpu_kernel void @s_select_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
   store <2 x i32> %select, ptr addrspace(1) %out, align 8
@@ -218,7 +218,7 @@ define amdgpu_kernel void @s_select_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
 ; GCN: buffer_store_dwordx4
-define amdgpu_kernel void @s_select_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b, i32 %c) #0 {
+define amdgpu_kernel void @s_select_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b
   store <4 x i32> %select, ptr addrspace(1) %out, align 16
@@ -234,7 +234,7 @@ define amdgpu_kernel void @s_select_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <
 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
 ; GCN: buffer_store_dwordx4
-define amdgpu_kernel void @v_select_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %cond) #0 {
+define amdgpu_kernel void @v_select_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %cond) nounwind {
 bb:
   %tmp2 = icmp ult i32 %cond, 32
   %val = load <4 x i32>, ptr addrspace(1) %in
@@ -252,7 +252,7 @@ bb:
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
-define amdgpu_kernel void @select_v8i32(ptr addrspace(1) %out, <8 x i32> %a, <8 x i32> %b, i32 %c) #0 {
+define amdgpu_kernel void @select_v8i32(ptr addrspace(1) %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
   store <8 x i32> %select, ptr addrspace(1) %out, align 16
@@ -264,7 +264,7 @@ define amdgpu_kernel void @select_v8i32(ptr addrspace(1) %out, <8 x i32> %a, <8
 ; GCN-DAG: s_cselect_b32
 ; GCN-DAG: s_cselect_b32
 ; GCN: buffer_store_dwordx2
-define amdgpu_kernel void @s_select_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b, i32 %c) #0 {
+define amdgpu_kernel void @s_select_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <2 x float> %a, <2 x float> %b
   store <2 x float> %select, ptr addrspace(1) %out, align 16
@@ -279,7 +279,7 @@ define amdgpu_kernel void @s_select_v2f32(ptr addrspace(1) %out, <2 x float> %a,
 ; GCN: s_cselect_b32
 
 ; GCN: buffer_store_dwordx
-define amdgpu_kernel void @s_select_v3f32(ptr addrspace(1) %out, <3 x float> %a, <3 x float> %b, i32 %c) #0 {
+define amdgpu_kernel void @s_select_v3f32(ptr addrspace(1) %out, <3 x float> %a, <3 x float> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <3 x float> %a, <3 x float> %b
   store <3 x float> %select, ptr addrspace(1) %out, align 16
@@ -296,7 +296,7 @@ define amdgpu_kernel void @s_select_v3f32(ptr addrspace(1) %out, <3 x float> %a,
 ; GCN: s_cselect_b32
 
 ; GCN: buffer_store_dwordx4
-define amdgpu_kernel void @s_select_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b, i32 %c) #0 {
+define amdgpu_kernel void @s_select_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <4 x float> %a, <4 x float> %b
   store <4 x float> %select, ptr addrspace(1) %out, align 16
@@ -312,7 +312,7 @@ define amdgpu_kernel void @s_select_v4f32(ptr addrspace(1) %out, <4 x float> %a,
 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
 ; GCN: buffer_store_dwordx4
-define amdgpu_kernel void @v_select_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %cond) #0 {
+define amdgpu_kernel void @v_select_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %cond) nounwind {
 bb:
   %tmp2 = icmp ult i32 %cond, 32
   %val = load <4 x float>, ptr addrspace(1) %in
@@ -331,7 +331,7 @@ bb:
 ; GCN: s_cselect_b32
 
 ; GCN: buffer_store_dwordx
-define amdgpu_kernel void @s_select_v5f32(ptr addrspace(1) %out, <5 x float> %a, <5 x float> %b, i32 %c) #0 {
+define amdgpu_kernel void @s_select_v5f32(ptr addrspace(1) %out, <5 x float> %a, <5 x float> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <5 x float> %a, <5 x float> %b
   store <5 x float> %select, ptr addrspace(1) %out, align 16
@@ -347,7 +347,7 @@ define amdgpu_kernel void @s_select_v5f32(ptr addrspace(1) %out, <5 x float> %a,
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
-define amdgpu_kernel void @select_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b, i32 %c) #0 {
+define amdgpu_kernel void @select_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <8 x float> %a, <8 x float> %b
   store <8 x float> %select, ptr addrspace(1) %out, align 16
@@ -359,7 +359,7 @@ define amdgpu_kernel void @select_v8f32(ptr addrspace(1) %out, <8 x float> %a, <
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
-define amdgpu_kernel void @select_v2f64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b, i32 %c) #0 {
+define amdgpu_kernel void @select_v2f64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <2 x double> %a, <2 x double> %b
   store <2 x double> %select, ptr addrspace(1) %out, align 16
@@ -375,7 +375,7 @@ define amdgpu_kernel void @select_v2f64(ptr addrspace(1) %out, <2 x double> %a,
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
-define amdgpu_kernel void @select_v4f64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b, i32 %c) #0 {
+define amdgpu_kernel void @select_v4f64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <4 x double> %a, <4 x double> %b
   store <4 x double> %select, ptr addrspace(1) %out, align 16
@@ -399,7 +399,7 @@ define amdgpu_kernel void @select_v4f64(ptr addrspace(1) %out, <4 x double> %a,
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
 ; GCN: s_cselect_b32
-define amdgpu_kernel void @select_v8f64(ptr addrspace(1) %out, <8 x double> %a, <8 x double> %b, i32 %c) #0 {
+define amdgpu_kernel void @select_v8f64(ptr addrspace(1) %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <8 x double> %a, <8 x double> %b
   store <8 x double> %select, ptr addrspace(1) %out, align 16
@@ -409,7 +409,7 @@ define amdgpu_kernel void @select_v8f64(ptr addrspace(1) %out, <8 x double> %a,
 ; GCN-LABEL: {{^}}v_select_v2f16:
 ; GCN: v_cndmask_b32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <2 x half>, ptr addrspace(1) %a.ptr
   %b = load <2 x half>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -422,7 +422,7 @@ define amdgpu_kernel void @v_select_v2f16(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v3f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v3f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <3 x half>, ptr addrspace(1) %a.ptr
   %b = load <3 x half>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -435,7 +435,7 @@ define amdgpu_kernel void @v_select_v3f16(ptr addrspace(1) %out, ptr addrspace(1
 ; GCN: v_cndmask_b32_e32
 ; GCN: v_cndmask_b32_e32
 ; GCN-NOT: cndmask
-define amdgpu_kernel void @v_select_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) #0 {
+define amdgpu_kernel void @v_select_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, i32 %c) nounwind {
   %a = load <4 x half>, ptr addrspace(1) %a.ptr
   %b = load <4 x half>, ptr addrspace(1) %b.ptr
   %cmp = icmp eq i32 %c, 0
@@ -445,7 +445,4 @@ define amdgpu_kernel void @v_select_v4f16(ptr addrspace(1) %out, ptr addrspace(1
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
index 81858bd3d29ee0..b0c96af09c8835 100644
--- a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
+++ b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
@@ -128,12 +128,8 @@ define amdgpu_cs void @if_else_vgpr_opt(ptr addrspace(8) inreg %input, ptr addrs
   ret void
 }
 
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #1
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32 immarg, i32 immarg) #2
-
-attributes #0 = { convergent nounwind readnone willreturn }
-attributes #1 = { convergent nounwind readnone speculatable willreturn }
-attributes #2 = { nounwind willreturn writeonly }
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) convergent nounwind readnone willreturn
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) convergent nounwind readnone willreturn
+declare i32 @llvm.amdgcn.strict.wwm.i32(i32) convergent nounwind readnone speculatable willreturn
+declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32 immarg, i32 immarg) nounwind willreturn writeonly
 
diff --git a/llvm/test/CodeGen/AMDGPU/setcc-fneg-constant.ll b/llvm/test/CodeGen/AMDGPU/setcc-fneg-constant.ll
index 74cbd0ffb2677d..b87250f5360700 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc-fneg-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc-fneg-constant.ll
@@ -10,7 +10,7 @@
 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL]]
 ; GCN: buffer_store_dword [[MUL]]
-define amdgpu_kernel void @multi_use_fneg_src() #0 {
+define amdgpu_kernel void @multi_use_fneg_src() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %b = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
@@ -33,7 +33,7 @@ define amdgpu_kernel void @multi_use_fneg_src() #0 {
 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[A]]
 ; GCN: v_mul_f32_e64 [[USE1:v[0-9]+]], [[MUL]], -[[MUL]]
-define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 {
+define amdgpu_kernel void @multi_foldable_use_fneg_src() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %b = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
@@ -59,7 +59,7 @@ define amdgpu_kernel void @multi_foldable_use_fneg_src() #0 {
 ; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 4.0, [[MUL]]
 ; GCN-NOT: xor
 ; GCN: buffer_store_dword [[MUL]]
-define amdgpu_kernel void @multi_use_fneg() #0 {
+define amdgpu_kernel void @multi_use_fneg() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %b = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
@@ -82,7 +82,7 @@ define amdgpu_kernel void @multi_use_fneg() #0 {
 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, [[MUL0]]
 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MUL0]], [[MUL0]]
 ; GCN: buffer_store_dword [[MUL1]]
-define amdgpu_kernel void @multi_foldable_use_fneg() #0 {
+define amdgpu_kernel void @multi_foldable_use_fneg() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %b = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
@@ -101,7 +101,7 @@ define amdgpu_kernel void @multi_foldable_use_fneg() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_oeq_posk_f32:
 ; GCN: v_cmp_eq_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -114,7 +114,7 @@ define amdgpu_kernel void @test_setcc_fneg_oeq_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_ogt_posk_f32:
 ; GCN: v_cmp_gt_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -127,7 +127,7 @@ define amdgpu_kernel void @test_setcc_fneg_ogt_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_oge_posk_f32:
 ; GCN: v_cmp_ge_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -140,7 +140,7 @@ define amdgpu_kernel void @test_setcc_fneg_oge_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_olt_posk_f32:
 ; GCN: v_cmp_lt_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -153,7 +153,7 @@ define amdgpu_kernel void @test_setcc_fneg_olt_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_ole_posk_f32:
 ; GCN: v_cmp_le_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -166,7 +166,7 @@ define amdgpu_kernel void @test_setcc_fneg_ole_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_one_posk_f32:
 ; GCN: v_cmp_lg_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -179,7 +179,7 @@ define amdgpu_kernel void @test_setcc_fneg_one_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_ueq_posk_f32:
 ; GCN: v_cmp_nlg_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -192,7 +192,7 @@ define amdgpu_kernel void @test_setcc_fneg_ueq_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_ugt_posk_f32:
 ; GCN: v_cmp_nle_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -205,7 +205,7 @@ define amdgpu_kernel void @test_setcc_fneg_ugt_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_uge_posk_f32:
 ; GCN: v_cmp_nlt_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -218,7 +218,7 @@ define amdgpu_kernel void @test_setcc_fneg_uge_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_ult_posk_f32:
 ; GCN: v_cmp_nge_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -231,7 +231,7 @@ define amdgpu_kernel void @test_setcc_fneg_ult_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_ule_posk_f32:
 ; GCN: v_cmp_ngt_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -244,7 +244,7 @@ define amdgpu_kernel void @test_setcc_fneg_ule_posk_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_setcc_fneg_une_posk_f32:
 ; GCN: v_cmp_neq_f32_e32 vcc, -4.0, v{{[0-9]+}}
-define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() #0 {
+define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() nounwind {
   %a = load volatile float, ptr addrspace(1) undef
   %x = load volatile i32, ptr addrspace(1) undef
   %y = load volatile i32, ptr addrspace(1) undef
@@ -254,5 +254,3 @@ define amdgpu_kernel void @test_setcc_fneg_une_posk_f32() #0 {
   store volatile i32 %select, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
index 4432ac4a9e8ff8..9e83d8e3df3eb5 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
@@ -285,8 +285,6 @@ endif:
 }
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
 
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll
index 6ab49382b90494..d21bf429436f2f 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc.ll
@@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; GCN: s_cmp_eq_u32
 ; GCN: s_cmp_eq_u32
-define amdgpu_kernel void @setcc_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @setcc_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %result = icmp eq <2 x i32> %a, %b
   %sext = sext <2 x i1> %result to <2 x i32>
   store <2 x i32> %sext, ptr addrspace(1) %out
@@ -26,7 +26,7 @@ define amdgpu_kernel void @setcc_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x
 ; GCN: s_cmp_eq_u32
 ; GCN: s_cmp_eq_u32
 ; GCN: s_cmp_eq_u32
-define amdgpu_kernel void @setcc_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @setcc_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
   %a = load <4 x i32>, ptr addrspace(1) %in
   %b = load <4 x i32>, ptr addrspace(1) %b_ptr
@@ -43,7 +43,7 @@ define amdgpu_kernel void @setcc_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %
 ; FUNC-LABEL: {{^}}f32_oeq:
 ; R600: SETE_DX10
 ; GCN: v_cmp_eq_f32
-define amdgpu_kernel void @f32_oeq(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_oeq(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp oeq float %a, %b
   %1 = sext i1 %0 to i32
@@ -54,7 +54,7 @@ entry:
 ; FUNC-LABEL: {{^}}f32_ogt:
 ; R600: SETGT_DX10
 ; GCN: v_cmp_gt_f32
-define amdgpu_kernel void @f32_ogt(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ogt(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp ogt float %a, %b
   %1 = sext i1 %0 to i32
@@ -65,7 +65,7 @@ entry:
 ; FUNC-LABEL: {{^}}f32_oge:
 ; R600: SETGE_DX10
 ; GCN: v_cmp_ge_f32
-define amdgpu_kernel void @f32_oge(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_oge(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp oge float %a, %b
   %1 = sext i1 %0 to i32
@@ -76,7 +76,7 @@ entry:
 ; FUNC-LABEL: {{^}}f32_olt:
 ; R600: SETGT_DX10
 ; GCN: v_cmp_lt_f32
-define amdgpu_kernel void @f32_olt(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_olt(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp olt float %a, %b
   %1 = sext i1 %0 to i32
@@ -87,7 +87,7 @@ entry:
 ; FUNC-LABEL: {{^}}f32_ole:
 ; R600: SETGE_DX10
 ; GCN: v_cmp_le_f32
-define amdgpu_kernel void @f32_ole(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ole(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp ole float %a, %b
   %1 = sext i1 %0 to i32
@@ -103,7 +103,7 @@ entry:
 
 ; GCN: v_cmp_lg_f32_e32 vcc
 ; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f32_one(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_one(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp one float %a, %b
   %1 = sext i1 %0 to i32
@@ -117,7 +117,7 @@ entry:
 ; R600-DAG: AND_INT
 ; R600-DAG: SETNE_INT
 ; GCN: v_cmp_o_f32
-define amdgpu_kernel void @f32_ord(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ord(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp ord float %a, %b
   %1 = sext i1 %0 to i32
@@ -133,7 +133,7 @@ entry:
 
 ; GCN: v_cmp_nlg_f32_e32 vcc
 ; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f32_ueq(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ueq(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp ueq float %a, %b
   %1 = sext i1 %0 to i32
@@ -146,7 +146,7 @@ entry:
 ; R600: SETE_DX10
 ; GCN: v_cmp_nle_f32_e32 vcc
 ; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f32_ugt(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ugt(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp ugt float %a, %b
   %1 = sext i1 %0 to i32
@@ -160,7 +160,7 @@ entry:
 
 ; GCN: v_cmp_nlt_f32_e32 vcc
 ; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f32_uge(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_uge(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp uge float %a, %b
   %1 = sext i1 %0 to i32
@@ -174,7 +174,7 @@ entry:
 
 ; GCN: v_cmp_nge_f32_e32 vcc
 ; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f32_ult(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ult(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp ult float %a, %b
   %1 = sext i1 %0 to i32
@@ -188,7 +188,7 @@ entry:
 
 ; GCN: v_cmp_ngt_f32_e32 vcc
 ; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f32_ule(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_ule(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp ule float %a, %b
   %1 = sext i1 %0 to i32
@@ -199,7 +199,7 @@ entry:
 ; FUNC-LABEL: {{^}}f32_une:
 ; R600: SETNE_DX10
 ; GCN: v_cmp_neq_f32
-define amdgpu_kernel void @f32_une(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_une(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp une float %a, %b
   %1 = sext i1 %0 to i32
@@ -213,7 +213,7 @@ entry:
 ; R600: OR_INT
 ; R600: SETNE_INT
 ; GCN: v_cmp_u_f32
-define amdgpu_kernel void @f32_uno(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @f32_uno(ptr addrspace(1) %out, float %a, float %b) nounwind {
 entry:
   %0 = fcmp uno float %a, %b
   %1 = sext i1 %0 to i32
@@ -228,7 +228,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_eq:
 ; R600: SETE_INT
 ; GCN: s_cmp_eq_u32
-define amdgpu_kernel void @i32_eq(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_eq(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp eq i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -239,7 +239,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_ne:
 ; R600: SETNE_INT
 ; GCN: s_cmp_lg_u32
-define amdgpu_kernel void @i32_ne(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_ne(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp ne i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -250,7 +250,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_ugt:
 ; R600: SETGT_UINT
 ; GCN: s_cmp_gt_u32
-define amdgpu_kernel void @i32_ugt(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_ugt(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp ugt i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -261,7 +261,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_uge:
 ; R600: SETGE_UINT
 ; GCN: s_cmp_ge_u32
-define amdgpu_kernel void @i32_uge(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_uge(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp uge i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -272,7 +272,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_ult:
 ; R600: SETGT_UINT
 ; GCN: s_cmp_lt_u32
-define amdgpu_kernel void @i32_ult(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_ult(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp ult i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -283,7 +283,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_ule:
 ; R600: SETGE_UINT
 ; GCN: s_cmp_le_u32
-define amdgpu_kernel void @i32_ule(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_ule(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp ule i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -294,7 +294,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_sgt:
 ; R600: SETGT_INT
 ; GCN: s_cmp_gt_i32
-define amdgpu_kernel void @i32_sgt(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_sgt(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp sgt i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -305,7 +305,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_sge:
 ; R600: SETGE_INT
 ; GCN: s_cmp_ge_i32
-define amdgpu_kernel void @i32_sge(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_sge(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp sge i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -316,7 +316,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_slt:
 ; R600: SETGT_INT
 ; GCN: s_cmp_lt_i32
-define amdgpu_kernel void @i32_slt(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_slt(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp slt i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -327,7 +327,7 @@ entry:
 ; FUNC-LABEL: {{^}}i32_sle:
 ; R600: SETGE_INT
 ; GCN: s_cmp_le_i32
-define amdgpu_kernel void @i32_sle(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @i32_sle(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %0 = icmp sle i32 %a, %b
   %1 = sext i1 %0 to i32
@@ -344,7 +344,7 @@ entry:
 ; GCN-DAG: v_cmp_eq_u32
 ; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
 ; GCN: s_endpgm
-define amdgpu_kernel void @v3i32_eq(ptr addrspace(1) %out, ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) #0 {
+define amdgpu_kernel void @v3i32_eq(ptr addrspace(1) %out, ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr <3 x i32>, ptr addrspace(1) %ptra, i32 %tid
   %gep.b = getelementptr <3 x i32>, ptr addrspace(1) %ptrb, i32 %tid
@@ -365,7 +365,7 @@ define amdgpu_kernel void @v3i32_eq(ptr addrspace(1) %out, ptr addrspace(1) %ptr
 ; GCN-DAG: v_cmp_eq_u32
 ; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
 ; GCN: s_endpgm
-define amdgpu_kernel void @v3i8_eq(ptr addrspace(1) %out, ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) #0 {
+define amdgpu_kernel void @v3i8_eq(ptr addrspace(1) %out, ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr <3 x i8>, ptr addrspace(1) %ptra, i32 %tid
   %gep.b = getelementptr <3 x i8>, ptr addrspace(1) %ptrb, i32 %tid
@@ -381,7 +381,7 @@ define amdgpu_kernel void @v3i8_eq(ptr addrspace(1) %out, ptr addrspace(1) %ptra
 ; Make sure we don't try to emit i1 setcc ops
 ; FUNC-LABEL: setcc-i1
 ; GCN: s_bitcmp0_b32 s{{[0-9]+}}, 0
-define amdgpu_kernel void @setcc-i1(i32 %in) #0 {
+define amdgpu_kernel void @setcc-i1(i32 %in) nounwind {
   %and = and i32 %in, 1
   %cmp = icmp eq i32 %and, 0
   br i1 %cmp, label %endif, label %if
@@ -395,7 +395,7 @@ endif:
 ; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
 ; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
 ; GCN: s_or_b64 s[2:3], [[A]], [[B]]
-define amdgpu_kernel void @setcc-i1-and-xor(ptr addrspace(1) %out, float %cond) #0 {
+define amdgpu_kernel void @setcc-i1-and-xor(ptr addrspace(1) %out, float %cond) nounwind {
 bb0:
   %tmp5 = fcmp oge float %cond, 0.000000e+00
   %tmp7 = fcmp ole float %cond, 1.000000e+00
@@ -462,5 +462,3 @@ entry:
   store <4 x float> %select.val.1, ptr addrspace(1) %r
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/setcc64.ll b/llvm/test/CodeGen/AMDGPU/setcc64.ll
index 438d8d22947a45..aa86143234ca0d 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc64.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc64.ll
@@ -9,7 +9,7 @@
 
 ; GCN-LABEL: {{^}}f64_oeq:
 ; GCN: v_cmp_eq_f64
-define amdgpu_kernel void @f64_oeq(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_oeq(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp oeq double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -19,7 +19,7 @@ entry:
 
 ; GCN-LABEL: {{^}}f64_ogt:
 ; GCN: v_cmp_gt_f64
-define amdgpu_kernel void @f64_ogt(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ogt(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp ogt double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -29,7 +29,7 @@ entry:
 
 ; GCN-LABEL: {{^}}f64_oge:
 ; GCN: v_cmp_ge_f64
-define amdgpu_kernel void @f64_oge(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_oge(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp oge double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -39,7 +39,7 @@ entry:
 
 ; GCN-LABEL: {{^}}f64_olt:
 ; GCN: v_cmp_lt_f64
-define amdgpu_kernel void @f64_olt(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_olt(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp olt double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -49,7 +49,7 @@ entry:
 
 ; GCN-LABEL: {{^}}f64_ole:
 ; GCN: v_cmp_le_f64
-define amdgpu_kernel void @f64_ole(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ole(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp ole double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -60,7 +60,7 @@ entry:
 ; GCN-LABEL: {{^}}f64_one:
 ; GCN: v_cmp_lg_f64_e32 vcc
 ; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f64_one(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_one(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp one double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -70,7 +70,7 @@ entry:
 
 ; GCN-LABEL: {{^}}f64_ord:
 ; GCN: v_cmp_o_f64
-define amdgpu_kernel void @f64_ord(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ord(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp ord double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -81,7 +81,7 @@ entry:
 ; GCN-LABEL: {{^}}f64_ueq:
 ; GCN: v_cmp_nlg_f64_e32 vcc
 ; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f64_ueq(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ueq(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp ueq double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -93,7 +93,7 @@ entry:
 
 ; GCN: v_cmp_nle_f64_e32 vcc
 ; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f64_ugt(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ugt(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp ugt double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -104,7 +104,7 @@ entry:
 ; GCN-LABEL: {{^}}f64_uge:
 ; GCN: v_cmp_nlt_f64_e32 vcc
 ; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f64_uge(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_uge(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp uge double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -115,7 +115,7 @@ entry:
 ; GCN-LABEL: {{^}}f64_ult:
 ; GCN: v_cmp_nge_f64_e32 vcc
 ; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f64_ult(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ult(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp ult double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -126,7 +126,7 @@ entry:
 ; GCN-LABEL: {{^}}f64_ule:
 ; GCN: v_cmp_ngt_f64_e32 vcc
 ; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-define amdgpu_kernel void @f64_ule(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_ule(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp ule double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -136,7 +136,7 @@ entry:
 
 ; GCN-LABEL: {{^}}f64_une:
 ; GCN: v_cmp_neq_f64
-define amdgpu_kernel void @f64_une(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_une(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp une double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -146,7 +146,7 @@ entry:
 
 ; GCN-LABEL: {{^}}f64_uno:
 ; GCN: v_cmp_u_f64
-define amdgpu_kernel void @f64_uno(ptr addrspace(1) %out, double %a, double %b) #0 {
+define amdgpu_kernel void @f64_uno(ptr addrspace(1) %out, double %a, double %b) nounwind {
 entry:
   %tmp0 = fcmp uno double %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -161,7 +161,7 @@ entry:
 ; GCN-LABEL: {{^}}i64_eq:
 ; SI: v_cmp_eq_u64
 ; VI: s_cmp_eq_u64
-define amdgpu_kernel void @i64_eq(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_eq(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp eq i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -172,7 +172,7 @@ entry:
 ; GCN-LABEL: {{^}}i64_ne:
 ; SI: v_cmp_ne_u64
 ; VI: s_cmp_lg_u64
-define amdgpu_kernel void @i64_ne(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_ne(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp ne i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -182,7 +182,7 @@ entry:
 
 ; GCN-LABEL: {{^}}i64_ugt:
 ; GCN: v_cmp_gt_u64
-define amdgpu_kernel void @i64_ugt(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_ugt(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp ugt i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -192,7 +192,7 @@ entry:
 
 ; GCN-LABEL: {{^}}i64_uge:
 ; GCN: v_cmp_ge_u64
-define amdgpu_kernel void @i64_uge(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_uge(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp uge i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -202,7 +202,7 @@ entry:
 
 ; GCN-LABEL: {{^}}i64_ult:
 ; GCN: v_cmp_lt_u64
-define amdgpu_kernel void @i64_ult(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_ult(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp ult i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -212,7 +212,7 @@ entry:
 
 ; GCN-LABEL: {{^}}i64_ule:
 ; GCN: v_cmp_le_u64
-define amdgpu_kernel void @i64_ule(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_ule(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp ule i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -222,7 +222,7 @@ entry:
 
 ; GCN-LABEL: {{^}}i64_sgt:
 ; GCN: v_cmp_gt_i64
-define amdgpu_kernel void @i64_sgt(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_sgt(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp sgt i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -232,7 +232,7 @@ entry:
 
 ; GCN-LABEL: {{^}}i64_sge:
 ; GCN: v_cmp_ge_i64
-define amdgpu_kernel void @i64_sge(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_sge(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp sge i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -242,7 +242,7 @@ entry:
 
 ; GCN-LABEL: {{^}}i64_slt:
 ; GCN: v_cmp_lt_i64
-define amdgpu_kernel void @i64_slt(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_slt(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp slt i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -252,7 +252,7 @@ entry:
 
 ; GCN-LABEL: {{^}}i64_sle:
 ; GCN: v_cmp_le_i64
-define amdgpu_kernel void @i64_sle(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @i64_sle(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 entry:
   %tmp0 = icmp sle i64 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -265,7 +265,7 @@ entry:
 ; CGV: v_cndmask
 ; SI: v_cmp_eq_u64
 ; VI: s_cmp_eq_u64
-define amdgpu_kernel void @i128_sle(ptr addrspace(1) %out, i128 %a, i128 %b) #0 {
+define amdgpu_kernel void @i128_sle(ptr addrspace(1) %out, i128 %a, i128 %b) nounwind {
 entry:
   %tmp0 = icmp sle i128 %a, %b
   %tmp1 = sext i1 %tmp0 to i32
@@ -276,12 +276,10 @@ entry:
 ; GCN-LABEL: {{^}}i128_eq_const:
 ; SI: v_cmp_eq_u64
 ; VI: s_cmp_eq_u64
-define amdgpu_kernel void @i128_eq_const(ptr addrspace(1) %out, i128 %a) #0 {
+define amdgpu_kernel void @i128_eq_const(ptr addrspace(1) %out, i128 %a) nounwind {
 entry:
   %tmp0 = icmp eq i128 %a, 85070591730234615865843651857942052992
   %tmp1 = sext i1 %tmp0 to i32
   store i32 %tmp1, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll
index 2169ee117cbaaf..95be64ae8e3b25 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll
@@ -127,7 +127,4 @@ define amdgpu_kernel void @sext_i32_to_i64_divergent(ptr addrspace(1) %out, i32
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
index 38672da3c647b0..f07f9a4a86e05d 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
@@ -15,7 +15,7 @@
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: LSHR * [[ADDR]]
 ; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
-define amdgpu_kernel void @sext_in_reg_i1_i32(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_i32(ptr addrspace(1) %out, i32 %in) nounwind {
   %shl = shl i32 %in, 31
   %sext = ashr i32 %shl, 31
   store i32 %sext, ptr addrspace(1) %out
@@ -32,7 +32,7 @@ define amdgpu_kernel void @sext_in_reg_i1_i32(ptr addrspace(1) %out, i32 %in) #0
 ; EG: ADD_INT
 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
 ; EG-NEXT: LSHR * [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %shl = shl i32 %c, 24
   %ashr = ashr i32 %shl, 24
@@ -50,7 +50,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32(ptr addrspace(1) %out, i32 %a,
 ; EG: ADD_INT
 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
 ; EG-NEXT: LSHR * [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %shl = shl i32 %c, 16
   %ashr = ashr i32 %shl, 16
@@ -68,7 +68,7 @@ define amdgpu_kernel void @sext_in_reg_i16_to_i32(ptr addrspace(1) %out, i32 %a,
 ; EG: ADD_INT
 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
 ; EG-NEXT: LSHR * [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(ptr addrspace(1) %out, <1 x i32> %a, <1 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(ptr addrspace(1) %out, <1 x i32> %a, <1 x i32> %b) nounwind {
   %c = add <1 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <1 x i32> %c, <i32 24>
   %ashr = ashr <1 x i32> %shl, <i32 24>
@@ -82,7 +82,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_v1i32(ptr addrspace(1) %out, <1 x i
 ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
 ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i1_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 63
   %ashr = ashr i64 %shl, 63
@@ -96,7 +96,7 @@ define amdgpu_kernel void @sext_in_reg_i1_to_i64(ptr addrspace(1) %out, i64 %a,
 ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
 ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i8_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 56
   %ashr = ashr i64 %shl, 56
@@ -111,7 +111,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i64(ptr addrspace(1) %out, i64 %a,
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
 ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
 
-define amdgpu_kernel void @sext_in_reg_i16_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i16_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 48
   %ashr = ashr i64 %shl, 48
@@ -125,7 +125,7 @@ define amdgpu_kernel void @sext_in_reg_i16_to_i64(ptr addrspace(1) %out, i64 %a,
 ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
 ; GCN: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @sext_in_reg_i32_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i32_to_i64(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 32
   %ashr = ashr i64 %shl, 32
@@ -140,7 +140,7 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(ptr addrspace(1) %out, i64 %a,
 ; XGCN: buffer_store_dword
 ; XEG: BFE_INT
 ; XEG: ASHR
-; define amdgpu_kernel void @sext_in_reg_i8_to_v1i64(ptr addrspace(1) %out, <1 x i64> %a, <1 x i64> %b) #0 {
+; define amdgpu_kernel void @sext_in_reg_i8_to_v1i64(ptr addrspace(1) %out, <1 x i64> %a, <1 x i64> %b) nounwind {
 ;   %c = add <1 x i64> %a, %b
 ;   %shl = shl <1 x i64> %c, <i64 56>
 ;   %ashr = ashr <1 x i64> %shl, <i64 56>
@@ -160,7 +160,7 @@ define amdgpu_kernel void @sext_in_reg_i32_to_i64(ptr addrspace(1) %out, i64 %a,
 
 ; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
@@ -187,7 +187,7 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(ptr addrspace(1) %out, ptr ad
 
 ; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
@@ -214,7 +214,7 @@ define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(ptr addrspace(1) %out, ptr ad
 
 ; SI: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
@@ -238,7 +238,7 @@ define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(ptr addrspace(1) %out, ptr a
 
 ; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[SHR]]]
-define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
@@ -264,7 +264,7 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(ptr addrspace(1) %out, ptr a
 ; EG: LSHL
 ; EG: ASHR [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b
   %x = shl i32 %c, 6
   %y = ashr i32 %x, 7
@@ -287,7 +287,7 @@ define amdgpu_kernel void @sext_in_reg_i1_in_i32_other_amount(ptr addrspace(1) %
 ; EG: LSHL
 ; EG: ASHR [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %c = add <2 x i32> %a, %b
   %x = shl <2 x i32> %c, <i32 6, i32 6>
   %y = ashr <2 x i32> %x, <i32 7, i32 7>
@@ -305,7 +305,7 @@ define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i32> %c, <i32 31, i32 31>
   %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
@@ -326,7 +326,7 @@ define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i32(ptr addrspace(1) %out, <2 x
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) nounwind {
   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
   %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
@@ -343,7 +343,7 @@ define amdgpu_kernel void @sext_in_reg_v4i1_to_v4i32(ptr addrspace(1) %out, <4 x
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i32> %c, <i32 24, i32 24>
   %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
@@ -364,7 +364,7 @@ define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i32(ptr addrspace(1) %out, <2 x
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) nounwind {
   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
@@ -381,7 +381,7 @@ define amdgpu_kernel void @sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, <4 x
 ; EG: BFE_INT [[RES]]
 ; EG: BFE_INT [[RES]]
 ; EG: LSHR {{\*?}} [[ADDR]]
-define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i32> %c, <i32 16, i32 16>
   %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
@@ -390,7 +390,7 @@ define amdgpu_kernel void @sext_in_reg_v2i16_to_v2i32(ptr addrspace(1) %out, <2
 }
 
 ; FUNC-LABEL: {{^}}testcase:
-define amdgpu_kernel void @testcase(ptr addrspace(1) %out, i8 %a) #0 {
+define amdgpu_kernel void @testcase(ptr addrspace(1) %out, i8 %a) nounwind {
   %and_a_1 = and i8 %a, 1
   %cmp_eq = icmp eq i8 %and_a_1, 0
   %cmp_slt = icmp slt i8 %a, 0
@@ -402,7 +402,7 @@ define amdgpu_kernel void @testcase(ptr addrspace(1) %out, i8 %a) #0 {
 }
 
 ; FUNC-LABEL: {{^}}testcase_3:
-define amdgpu_kernel void @testcase_3(ptr addrspace(1) %out, i8 %a) #0 {
+define amdgpu_kernel void @testcase_3(ptr addrspace(1) %out, i8 %a) nounwind {
   %and_a_1 = and i8 %a, 1
   %cmp_eq = icmp eq i8 %and_a_1, 0
   %cmp_slt = icmp slt i8 %a, 0
@@ -418,7 +418,7 @@ define amdgpu_kernel void @testcase_3(ptr addrspace(1) %out, i8 %a) #0 {
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
   %loada = load <4 x i32>, ptr addrspace(1) %a, align 16
   %loadb = load <4 x i32>, ptr addrspace(1) %b, align 16
   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
@@ -431,7 +431,7 @@ define amdgpu_kernel void @vgpr_sext_in_reg_v4i8_to_v4i32(ptr addrspace(1) %out,
 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 ; GCN: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
-define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
+define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
   %loada = load <4 x i32>, ptr addrspace(1) %a, align 16
   %loadb = load <4 x i32>, ptr addrspace(1) %b, align 16
   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
@@ -446,7 +446,7 @@ define amdgpu_kernel void @vgpr_sext_in_reg_v4i16_to_v4i32(ptr addrspace(1) %out
 ; GCN: v_max_i32
 ; GCN-NOT: bfe
 ; GCN: buffer_store_short
-define amdgpu_kernel void @sext_in_reg_to_illegal_type(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %src) #0 {
+define amdgpu_kernel void @sext_in_reg_to_illegal_type(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %src) nounwind {
   %tmp5 = load i8, ptr addrspace(1) %src, align 1
   %tmp2 = sext i8 %tmp5 to i32
   %tmp2.5 = icmp sgt i32 %tmp2, 0
@@ -472,7 +472,7 @@ define amdgpu_kernel void @sext_in_reg_to_illegal_type(ptr addrspace(1) nocaptur
 ; GCN-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]]
 ; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
@@ -502,7 +502,7 @@ define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(ptr addrspace(1) %ou
 
 ; SI: buffer_store_dwordx2 v[[[RESULT_LO]]:[[RESULT_HI]]]
 ; GFX89: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
-define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, i64 %s.val) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %a.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %b.gep = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
@@ -528,7 +528,7 @@ define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(ptr addrspace(1) %o
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
-define amdgpu_kernel void @s_sext_in_reg_i1_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i1_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) nounwind {
   %ld = load i32, ptr addrspace(4) %ptr
   %in = trunc i32 %ld to i16
   %shl = shl i16 %in, 15
@@ -547,7 +547,7 @@ define amdgpu_kernel void @s_sext_in_reg_i1_i16(ptr addrspace(1) %out, ptr addrs
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
-define amdgpu_kernel void @s_sext_in_reg_i2_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i2_i16(ptr addrspace(1) %out, ptr addrspace(4) %ptr) nounwind {
   %ld = load i32, ptr addrspace(4) %ptr
   %in = trunc i32 %ld to i16
   %shl = shl i16 %in, 14
@@ -561,7 +561,7 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16(ptr addrspace(1) %out, ptr addrs
 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}}
 
 ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
-define amdgpu_kernel void @v_sext_in_reg_i1_i16(ptr addrspace(3) %out, ptr addrspace(1) %ptr) #0 {
+define amdgpu_kernel void @v_sext_in_reg_i1_i16(ptr addrspace(3) %out, ptr addrspace(1) %ptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr i16, ptr addrspace(1) %ptr, i32 %tid
   %out.gep = getelementptr i16, ptr addrspace(3) %out, i32 %tid
@@ -608,7 +608,7 @@ define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(ptr addrspace(3) %out, p
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
-define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(ptr addrspace(1) %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(ptr addrspace(1) %out, i16 %in) nounwind {
   %shl = shl i16 %in, 14
   %sext = ashr i16 %shl, 14
   store i16 %sext, ptr addrspace(1) %out
@@ -625,7 +625,7 @@ define amdgpu_kernel void @s_sext_in_reg_i2_i16_arg(ptr addrspace(1) %out, i16 %
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
-define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(ptr addrspace(1) %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(ptr addrspace(1) %out, i16 %in) nounwind {
   %shl = shl i16 %in, 8
   %sext = ashr i16 %shl, 8
   store i16 %sext, ptr addrspace(1) %out
@@ -642,7 +642,7 @@ define amdgpu_kernel void @s_sext_in_reg_i8_i16_arg(ptr addrspace(1) %out, i16 %
 ; GFX89: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
 ; GFX89: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX89: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
-define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(ptr addrspace(1) %out, i16 %in) #0 {
+define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(ptr addrspace(1) %out, i16 %in) nounwind {
   %shl = shl i16 %in, 1
   %sext = ashr i16 %shl, 1
   store i16 %sext, ptr addrspace(1) %out
@@ -653,7 +653,7 @@ define amdgpu_kernel void @s_sext_in_reg_i15_i16_arg(ptr addrspace(1) %out, i16
 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
 ; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 15, [[ADD]]
 ; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 15, [[SHL]]
-define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) nounwind {
   %c = add <2 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i16> %c, <i16 15, i16 15>
   %ashr = ashr <2 x i16> %shl, <i16 15, i16 15>
@@ -668,7 +668,7 @@ define amdgpu_kernel void @sext_in_reg_v2i1_to_v2i16(ptr addrspace(1) %out, <2 x
 ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 15, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 15, v{{[0-9]+}}
-define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) nounwind {
   %c = add <3 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <3 x i16> %c, <i16 15, i16 15, i16 15>
   %ashr = ashr <3 x i16> %shl, <i16 15, i16 15, i16 15>
@@ -680,7 +680,7 @@ define amdgpu_kernel void @sext_in_reg_v3i1_to_v3i16(ptr addrspace(1) %out, <3 x
 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
 ; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 14, [[ADD]]
 ; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 14, [[SHL]]
-define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) nounwind {
   %c = add <2 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i16> %c, <i16 14, i16 14>
   %ashr = ashr <2 x i16> %shl, <i16 14, i16 14>
@@ -692,7 +692,7 @@ define amdgpu_kernel void @sext_in_reg_v2i2_to_v2i16(ptr addrspace(1) %out, <2 x
 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]]
 ; GFX9: v_pk_lshlrev_b16 [[SHL:v[0-9]+]], 8, [[ADD]]
 ; GFX9: v_pk_ashrrev_i16 [[SRA:v[0-9]+]], 8, [[SHL]]
-define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) nounwind {
   %c = add <2 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <2 x i16> %c, <i16 8, i16 8>
   %ashr = ashr <2 x i16> %shl, <i16 8, i16 8>
@@ -707,7 +707,7 @@ define amdgpu_kernel void @sext_in_reg_v2i8_to_v2i16(ptr addrspace(1) %out, <2 x
 ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
-define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) #0 {
+define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(ptr addrspace(1) %out, <3 x i16> %a, <3 x i16> %b) nounwind {
   %c = add <3 x i16> %a, %b ; add to prevent folding into extload
   %shl = shl <3 x i16> %c, <i16 8, i16 8, i16 8>
   %ashr = ashr <3 x i16> %shl, <i16 8, i16 8, i16 8>
@@ -715,7 +715,4 @@ define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(ptr addrspace(1) %out, <3 x
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index b67ecc2f9d13c8..df8f6171e43968 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -136,7 +136,7 @@ define amdgpu_kernel void @sgpr_if_else_valu_br(ptr addrspace(1) %out, float %a,
 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 entry:
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
   %tid_f = uitofp i32 %tid to float
   %tmp1 = fcmp ueq float %tid_f, 0.0
   br i1 %tmp1, label %if, label %else
@@ -199,7 +199,7 @@ define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(ptr addrspace(1) %out, p
 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 entry:
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
   %tmp1 = icmp eq i32 %tid, 0
   br i1 %tmp1, label %if, label %else
 
@@ -222,6 +222,4 @@ endif:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() readnone
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
index a3cb3cfba55522..30e23fb43ea1d0 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -5,7 +5,7 @@
 ; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
 ; CHECK: ; %bb.1: ; %ELSE
 ; CHECK: s_xor_b32 s{{[0-9]}}, [[DST]]
-define amdgpu_ps void @phi1(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @phi1(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg, !tbaa !0
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 0, i32 0)
@@ -22,13 +22,13 @@ ELSE:                                             ; preds = %main_body
 ENDIF:                                            ; preds = %ELSE, %main_body
   %temp.0 = phi float [ %tmp26, %ELSE ], [ %tmp21, %main_body ]
   %tmp27 = fadd float %temp.0, %tmp23
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp27, float %tmp27, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) nounwind
   ret void
 }
 
 ; Make sure this program doesn't crash
 ; CHECK-LABEL: {{^}}phi2:
-define amdgpu_ps void @phi2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
+define amdgpu_ps void @phi2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) nounwind readnone {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg, !tbaa !0
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 16, i32 0)
@@ -52,32 +52,32 @@ main_body:
   %j.i = extractelement <2 x i32> %arg5, i32 1
   %i.f.i = bitcast i32 %i.i to float
   %j.f.i = bitcast i32 %j.i to float
-  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) #1
-  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) #1
+  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg3) nounwind readnone
+  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg3) nounwind readnone
   %i.i19 = extractelement <2 x i32> %arg5, i32 0
   %j.i20 = extractelement <2 x i32> %arg5, i32 1
   %i.f.i21 = bitcast i32 %i.i19 to float
   %j.f.i22 = bitcast i32 %j.i20 to float
-  %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) #1
-  %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) #1
+  %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 1, i32 0, i32 %arg3) nounwind readnone
+  %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 1, i32 0, i32 %arg3) nounwind readnone
   %i.i13 = extractelement <2 x i32> %arg5, i32 0
   %j.i14 = extractelement <2 x i32> %arg5, i32 1
   %i.f.i15 = bitcast i32 %i.i13 to float
   %j.f.i16 = bitcast i32 %j.i14 to float
-  %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) #1
-  %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) #1
+  %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 1, i32 %arg3) nounwind readnone
+  %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 1, i32 %arg3) nounwind readnone
   %i.i7 = extractelement <2 x i32> %arg5, i32 0
   %j.i8 = extractelement <2 x i32> %arg5, i32 1
   %i.f.i9 = bitcast i32 %i.i7 to float
   %j.f.i10 = bitcast i32 %j.i8 to float
-  %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) #1
-  %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) #1
+  %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 1, i32 %arg3) nounwind readnone
+  %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 1, i32 %arg3) nounwind readnone
   %i.i1 = extractelement <2 x i32> %arg5, i32 0
   %j.i2 = extractelement <2 x i32> %arg5, i32 1
   %i.f.i3 = bitcast i32 %i.i1 to float
   %j.f.i4 = bitcast i32 %j.i2 to float
-  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1
-  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1
+  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) nounwind readnone
+  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) nounwind readnone
   %tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32>
   %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i24, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i1 0, i32 0, i32 0)
   %tmp50 = extractelement <4 x float> %tmp1, i32 2
@@ -159,13 +159,13 @@ ENDIF24:                                          ; preds = %IF25, %ENDIF
   %tmp112 = fmul float %tmp111, %tmp106
   %tmp113 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp108, float %tmp110)
   %tmp115 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp112, float 1.000000e+00)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp113, <2 x half> %tmp115, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp113, <2 x half> %tmp115, i1 true, i1 true) nounwind
   ret void
 }
 
 ; We just want to make sure the program doesn't crash
 ; CHECK-LABEL: {{^}}loop:
-define amdgpu_ps void @loop(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @loop(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg, !tbaa !0
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 0, i32 0)
@@ -191,7 +191,7 @@ LOOP:                                             ; preds = %ENDIF, %main_body
   br i1 %tmp33, label %IF, label %ENDIF
 
 IF:                                               ; preds = %LOOP
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp4.0, float %temp5.0, float %temp6.0, float 1.000000e+00, i1 true, i1 true) nounwind
   ret void
 
 ENDIF:                                            ; preds = %LOOP
@@ -217,7 +217,7 @@ ENDIF:                                            ; preds = %LOOP
 ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v[[[SAMPLE_LO]]:[[SAMPLE_HI]]]
 ; CHECK: exp
 ; CHECK: s_endpgm
-define amdgpu_ps void @sample_v3(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @sample_v3(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) nounwind {
 entry:
   %tmp21 = load <4 x i32>, ptr addrspace(4) %arg, !tbaa !0
   %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 16, i32 0)
@@ -245,7 +245,7 @@ endif:                                            ; preds = %else, %if
   %val.0 = phi float [ %val.if.0, %if ], [ %val.else.0, %else ]
   %val.1 = phi float [ %val.if.1, %if ], [ %val.else.1, %else ]
   %val.2 = phi float [ %val.if.2, %if ], [ %val.else.2, %else ]
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %val.0, float %val.1, float %val.2, float 0.000000e+00, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %val.0, float %val.1, float %val.2, float 0.000000e+00, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -278,7 +278,7 @@ endif:                                            ; preds = %if1, %if0, %entry
 ; This test is just checking that we don't crash / assertion fail.
 ; CHECK-LABEL: {{^}}copy2:
 ; CHECK: s_endpgm
-define amdgpu_ps void @copy2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @copy2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) nounwind {
 entry:
   br label %LOOP68
 
@@ -292,7 +292,7 @@ LOOP68:                                           ; preds = %ENDIF69, %entry
 IF70:                                             ; preds = %LOOP68
   %q = icmp ne i32 %l, 13
   %temp.8 = select i1 %q, float 1.000000e+00, float 0.000000e+00
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %temp.8, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, i1 true, i1 true) nounwind
   ret void
 
 ENDIF69:                                          ; preds = %LOOP68
@@ -316,7 +316,7 @@ ENDIF69:                                          ; preds = %LOOP68
 ; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
 ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]]
 ; CHECK: s_branch
-define amdgpu_ps void @sample_rsrc(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+define amdgpu_ps void @sample_rsrc(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) nounwind {
 bb:
   %tmp22 = load <4 x i32>, ptr addrspace(4) %arg1, !tbaa !3
   %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp22, i32 16, i32 0)
@@ -326,14 +326,14 @@ bb:
   %j.i = extractelement <2 x i32> %arg7, i32 1
   %i.f.i = bitcast i32 %i.i to float
   %j.f.i = bitcast i32 %j.i to float
-  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #0
-  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #0
+  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) nounwind
+  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) nounwind
   %i.i1 = extractelement <2 x i32> %arg7, i32 0
   %j.i2 = extractelement <2 x i32> %arg7, i32 1
   %i.f.i3 = bitcast i32 %i.i1 to float
   %j.f.i4 = bitcast i32 %j.i2 to float
-  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #0
-  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #0
+  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) nounwind
+  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) nounwind
   %tmp31 = bitcast float %tmp23 to i32
   %tmp36 = icmp ne i32 %tmp31, 0
   br i1 %tmp36, label %bb38, label %bb80
@@ -356,56 +356,52 @@ bb80:                                             ; preds = %bb
 bb71:                                             ; preds = %bb80, %bb38
   %tmp72 = phi <4 x float> [ %tmp2, %bb38 ], [ %tmp3, %bb80 ]
   %tmp88 = extractelement <4 x float> %tmp72, i32 0
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp88, float %tmp88, float %tmp88, float %tmp88, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp88, float %tmp88, float %tmp88, float %tmp88, i1 true, i1 true) nounwind
   ret void
 }
 
 ; Check the resource descriptor is stored in an sgpr.
 ; CHECK-LABEL: {{^}}mimg_srsrc_sgpr:
 ; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define amdgpu_ps void @mimg_srsrc_sgpr(ptr addrspace(4) inreg %arg) #0 {
+define amdgpu_ps void @mimg_srsrc_sgpr(ptr addrspace(4) inreg %arg) nounwind {
 bb:
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind
   %tmp7 = getelementptr [34 x <8 x i32>], ptr addrspace(4) %arg, i32 0, i32 %tid
   %tmp8 = load <8 x i32>, ptr addrspace(4) %tmp7, align 32, !tbaa !0
   %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp10 = extractelement <4 x float> %tmp, i32 0
   %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) nounwind
   ret void
 }
 
 ; Check the sampler is stored in an sgpr.
 ; CHECK-LABEL: {{^}}mimg_ssamp_sgpr:
 ; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define amdgpu_ps void @mimg_ssamp_sgpr(ptr addrspace(4) inreg %arg) #0 {
+define amdgpu_ps void @mimg_ssamp_sgpr(ptr addrspace(4) inreg %arg) nounwind {
 bb:
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind
   %tmp7 = getelementptr [17 x <4 x i32>], ptr addrspace(4) %arg, i32 0, i32 %tid
   %tmp8 = load <4 x i32>, ptr addrspace(4) %tmp7, align 16, !tbaa !0
   %tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> undef, <4 x i32> %tmp8, i1 0, i32 0, i32 0)
   %tmp10 = extractelement <4 x float> %tmp, i32 0
   %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) nounwind
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.amdgcn.rsq.f32(float) #1
-declare float @llvm.exp2.f32(float) #1
-declare float @llvm.pow.f32(float, float) #1
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
+declare float @llvm.exp2.f32(float) nounwind readnone
+declare float @llvm.pow.f32(float, float) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind readnone
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) nounwind readnone
 
 !0 = !{!1, !1, i64 0, i32 1}
 !1 = !{!"const", !2}
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
index 29622d3fd0f1b5..0a05ad0cb03e47 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
@@ -4,54 +4,51 @@
 ; which was due to incorrect book-keeping of removed dead frame indices.
 
 ; CHECK-LABEL: {{^}}kernel0:
-define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
-  call void asm sideeffect "", "~{v[0:7]}" () #0
-  call void asm sideeffect "", "~{v[8:15]}" () #0
-  call void asm sideeffect "", "~{v[16:19]}"() #0
-  call void asm sideeffect "", "~{v[20:21]}"() #0
-  call void asm sideeffect "", "~{v22}"() #0
-  %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val5 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val7 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val8 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val9 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val11 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val12 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val13 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val15 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val16 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val17 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val18 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
-  %val19 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val0) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val1) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val2) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val3) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val4) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val5) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val6) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val7) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val8) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val9) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val10) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val11) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val12) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val13) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val14) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val15) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %val16) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %val17) #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %val18) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %val19) #0
+define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) nounwind "amdgpu-waves-per-eu"="10,10" {
+  call void asm sideeffect "", "~{v[0:7]}" () nounwind
+  call void asm sideeffect "", "~{v[8:15]}" () nounwind
+  call void asm sideeffect "", "~{v[16:19]}"() nounwind
+  call void asm sideeffect "", "~{v[20:21]}"() nounwind
+  call void asm sideeffect "", "~{v22}"() nounwind
+  %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val2 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val3 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val4 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val5 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val6 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val7 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val8 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val9 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val10 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val11 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val12 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val13 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val14 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val15 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val16 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val17 = call <4 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val18 = call <8 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %val19 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val0) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val1) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val2) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val3) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val4) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val5) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val6) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val7) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val8) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val9) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val10) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val11) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val12) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val13) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val14) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val15) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %val16) nounwind
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %val17) nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %val18) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %val19) nounwind
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="10,10" }
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
index d430ba758572d6..57268ce59aafe3 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
@@ -4,7 +4,7 @@
 ; The first 64 SGPR spills can go to a VGPR, but there isn't a second
 ; so some spills must be to memory. The last 16 element spill runs out of lanes at the 15th element.
 
-define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 {
+define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) nounwind "amdgpu-waves-per-eu"="10,10" {
 ; GCN-LABEL: partial_no_vgprs_last_sgpr_spill:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_add_u32 s0, s0, s13
@@ -221,34 +221,31 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
 ; GCN-NEXT:    ; kill: killed $vgpr1
 ; GCN-NEXT:    ; kill: killed $vgpr0
 ; GCN-NEXT:    s_endpgm
-  call void asm sideeffect "", "~{v[0:7]}" () #0
-  call void asm sideeffect "", "~{v[8:15]}" () #0
-  call void asm sideeffect "", "~{v[16:19]}"() #0
-  call void asm sideeffect "", "~{v[20:21]}"() #0
-  call void asm sideeffect "", "~{v22}"() #0
+  call void asm sideeffect "", "~{v[0:7]}" () nounwind
+  call void asm sideeffect "", "~{v[8:15]}" () nounwind
+  call void asm sideeffect "", "~{v[16:19]}"() nounwind
+  call void asm sideeffect "", "~{v[20:21]}"() nounwind
+  call void asm sideeffect "", "~{v22}"() nounwind
 
-  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
-  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () nounwind
+  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) nounwind
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) nounwind
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) nounwind
   br label %ret
 
 ret:
   ret void
 }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="10,10" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index 6a2532147f886c..1c963c5c6295ea 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -2,11 +2,9 @@
 # RUN: llc -mtriple=amdgcn-unknown-amdpal -mcpu=gfx1030 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs --stress-regalloc=5 -o - %s | FileCheck -check-prefix GCN %s
 
 --- |
-  define amdgpu_gfx [13 x i32] @test_main() #0 {
+  define amdgpu_gfx [13 x i32] @test_main() alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize32,+cumode,+enable-flat-scratch" "uniform-work-group-size"="false" {
     ret [13 x i32] poison
   }
-
-  attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize32,+cumode,+enable-flat-scratch" "uniform-work-group-size"="false" }
 ...
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
index f19b0a5f53e95a..d6ff0dffc834c1 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
@@ -7,15 +7,13 @@
 
 --- |
 
-  define amdgpu_kernel void @check_spill() #0 {
+  define amdgpu_kernel void @check_spill() "frame-pointer"="all" {
     ret void
   }
 
-  define amdgpu_kernel void @check_reload() #0 {
+  define amdgpu_kernel void @check_reload() "frame-pointer"="all" {
     ret void
   }
-
-  attributes #0 = {  "frame-pointer"="all" }
 ...
 ---
 name:            check_spill
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
index 764f4942cbd03c..bf980d1d717236 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
@@ -1,18 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx803 -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
-define void @child_function() #0 {
+define void @child_function() nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: child_function:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
-  call void asm sideeffect "", "~{vcc}" () #0
+  call void asm sideeffect "", "~{vcc}" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
   ret void
 }
 
-define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
+define void @spill_sgpr_with_no_lower_vgpr_available() nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: spill_sgpr_with_no_lower_vgpr_available:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -302,12 +302,12 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
   ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
   ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
   ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
-  ,~{v250},~{v251},~{v252},~{v253},~{v254}" () #0
+  ,~{v250},~{v251},~{v252},~{v253},~{v254}" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
   call void @child_function()
   ret void
 }
 
-define void @spill_to_lowest_available_vgpr() #0 {
+define void @spill_to_lowest_available_vgpr() nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: spill_to_lowest_available_vgpr:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -595,12 +595,12 @@ define void @spill_to_lowest_available_vgpr() #0 {
   ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
   ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
   ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
-  ,~{v250},~{v251},~{v252},~{v253}" () #0
+  ,~{v250},~{v251},~{v252},~{v253}" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
   call void @child_function()
   ret void
 }
 
-define void @spill_sgpr_with_sgpr_uses() #0 {
+define void @spill_sgpr_with_sgpr_uses() nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: spill_sgpr_with_sgpr_uses:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -893,21 +893,21 @@ define void @spill_sgpr_with_sgpr_uses() #0 {
   ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
   ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
   ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
-  ,~{v250},~{v251},~{v252},~{v253}" () #0
+  ,~{v250},~{v251},~{v252},~{v253}" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
 
-  %sgpr = call i32 asm sideeffect "; def $0", "=s" () #0
+  %sgpr = call i32 asm sideeffect "; def $0", "=s" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
   %cmp = icmp eq i32 undef, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(i32 %sgpr) #0
+  call void asm sideeffect "; use $0", "s"(i32 %sgpr) nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
   br label %ret
 
 ret:
   ret void
 }
 
-define void @spill_sgpr_with_tail_call() #0 {
+define void @spill_sgpr_with_tail_call() nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: spill_sgpr_with_tail_call:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1175,12 +1175,12 @@ define void @spill_sgpr_with_tail_call() #0 {
   ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
   ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
   ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
-  ,~{v250},~{v251},~{v252},~{v253},~{v254}" () #0
+  ,~{v250},~{v251},~{v252},~{v253},~{v254}" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
   musttail call void @child_function()
   ret void
 }
 
-define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: spill_sgpr_no_free_vgpr:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1480,16 +1480,16 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
   ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
   ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
   ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
-  ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
+  ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
 
   call void asm sideeffect "",
-  "~{s34},~{s35},~{s36},~{s37}" () #0
+  "~{s34},~{s35},~{s36},~{s37}" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
 
   store <4 x i32> %a, ptr addrspace(1) %out
   ret void
 }
 
-define internal void @child_function_ipra() #0 {
+define internal void @child_function_ipra() nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: child_function_ipra:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1522,11 +1522,11 @@ define internal void @child_function_ipra() #0 {
   ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
   ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
   ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
-  ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
+  ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
   ret void
 }
 
-define void @spill_sgpr_no_free_vgpr_ipra() #0 {
+define void @spill_sgpr_no_free_vgpr_ipra() nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1808,7 +1808,7 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 {
   ret void
 }
 
-define internal void @child_function_ipra_tail_call() #0 {
+define internal void @child_function_ipra_tail_call() nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: child_function_ipra_tail_call:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2066,11 +2066,11 @@ define internal void @child_function_ipra_tail_call() #0 {
   ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
   ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
   ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
-  ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
+  ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256"
   ret void
 }
 
-define void @spill_sgpr_no_free_vgpr_ipra_tail_call() #0 {
+define void @spill_sgpr_no_free_vgpr_ipra_tail_call() nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" {
 ; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra_tail_call:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2081,6 +2081,3 @@ define void @spill_sgpr_no_free_vgpr_ipra_tail_call() #0 {
   tail call void @child_function_ipra_tail_call()
   ret void
 }
-
-
-attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/sgprcopies.ll b/llvm/test/CodeGen/AMDGPU/sgprcopies.ll
index 5a66bff1ce61cd..42c317601298bd 100644
--- a/llvm/test/CodeGen/AMDGPU/sgprcopies.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgprcopies.ll
@@ -5,7 +5,7 @@
 ; GCN: v_add
 define amdgpu_kernel void @checkTwoBlocksWithUniformBranch(ptr addrspace(1) nocapture %out, i32 %width, float %xPos, float %yPos, float %xStep, float %yStep, i32 %maxIter) {
 entry:
-  %conv = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %conv = call i32 @llvm.amdgcn.workitem.id.x() readnone
   %rem = urem i32 %conv, %width
   %div = udiv i32 %conv, %width
   %conv1 = sitofp i32 %rem to float
@@ -27,10 +27,10 @@ for.body:                                         ; preds = %for.body.preheader,
   %iter_val = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %y_val = phi float [ %call9, %for.body ], [ %y, %for.body.preheader ]
   %sub = fsub float -0.000000e+00, %y_val
-  %call7 = tail call float @llvm.fmuladd.f32(float %x_val, float %x_val, float %x) #1
-  %call8 = tail call float @llvm.fmuladd.f32(float %sub, float %y_val, float %call7) #1
+  %call7 = tail call float @llvm.fmuladd.f32(float %x_val, float %x_val, float %x) readnone
+  %call8 = tail call float @llvm.fmuladd.f32(float %sub, float %y_val, float %call7) readnone
   %mul = fmul float %x_val, 2.000000e+00
-  %call9 = tail call float @llvm.fmuladd.f32(float %mul, float %y_val, float %y) #1
+  %call9 = tail call float @llvm.fmuladd.f32(float %mul, float %y_val, float %y) readnone
   %inc = add nuw i32 %iter_val, 1
   %mul3 = fmul float %call9, %call9
   %0 = tail call float @llvm.fmuladd.f32(float %call8, float %call8, float %mul3)
@@ -51,8 +51,5 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.fmuladd.f32(float, float, float) #1
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare float @llvm.fmuladd.f32(float, float, float) readnone
diff --git a/llvm/test/CodeGen/AMDGPU/shared-op-cycle.ll b/llvm/test/CodeGen/AMDGPU/shared-op-cycle.ll
index e0308bc56d172a..d2a40465c812e2 100644
--- a/llvm/test/CodeGen/AMDGPU/shared-op-cycle.ll
+++ b/llvm/test/CodeGen/AMDGPU/shared-op-cycle.ll
@@ -36,8 +36,6 @@ define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4
 }
 
 ; Function Attrs: readnone
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
+declare float @llvm.r600.dot4(<4 x float>, <4 x float>) readnone
 
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
index 21fcd3cd0dcd61..fb8d2f467702bb 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; Extract the high bit of the 1st quarter
-define amdgpu_kernel void @v_uextract_bit_31_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-LABEL: v_uextract_bit_31_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -34,7 +34,7 @@ define amdgpu_kernel void @v_uextract_bit_31_i128(ptr addrspace(1) %out, ptr add
 }
 
 ; Extract the high bit of the 2nd quarter
-define amdgpu_kernel void @v_uextract_bit_63_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_63_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-LABEL: v_uextract_bit_63_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -65,7 +65,7 @@ define amdgpu_kernel void @v_uextract_bit_63_i128(ptr addrspace(1) %out, ptr add
 }
 
 ; Extract the high bit of the 3rd quarter
-define amdgpu_kernel void @v_uextract_bit_95_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_95_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-LABEL: v_uextract_bit_95_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -98,7 +98,7 @@ define amdgpu_kernel void @v_uextract_bit_95_i128(ptr addrspace(1) %out, ptr add
 }
 
 ; Extract the high bit of the 4th quarter
-define amdgpu_kernel void @v_uextract_bit_127_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_127_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-LABEL: v_uextract_bit_127_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -129,7 +129,7 @@ define amdgpu_kernel void @v_uextract_bit_127_i128(ptr addrspace(1) %out, ptr ad
 }
 
 ; Spans more than 2 dword boundaries
-define amdgpu_kernel void @v_uextract_bit_34_100_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_34_100_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GCN-LABEL: v_uextract_bit_34_100_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -159,9 +159,6 @@ define amdgpu_kernel void @v_uextract_bit_34_100_i128(ptr addrspace(1) %out, ptr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
index bdc607552a0dfb..8ed0c256751d82 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
@@ -9,7 +9,7 @@
 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_31_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -27,7 +27,7 @@ define amdgpu_kernel void @v_uextract_bit_31_i64(ptr addrspace(1) %out, ptr addr
 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -43,7 +43,7 @@ define amdgpu_kernel void @v_uextract_bit_63_i64(ptr addrspace(1) %out, ptr addr
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -59,7 +59,7 @@ define amdgpu_kernel void @v_uextract_bit_1_i64(ptr addrspace(1) %out, ptr addrs
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_20_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_20_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -76,7 +76,7 @@ define amdgpu_kernel void @v_uextract_bit_20_i64(ptr addrspace(1) %out, ptr addr
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}}
 ; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -93,7 +93,7 @@ define amdgpu_kernel void @v_uextract_bit_32_i64(ptr addrspace(1) %out, ptr addr
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -109,7 +109,7 @@ define amdgpu_kernel void @v_uextract_bit_33_i64(ptr addrspace(1) %out, ptr addr
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_20_21_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_20_21_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -125,7 +125,7 @@ define amdgpu_kernel void @v_uextract_bit_20_21_i64(ptr addrspace(1) %out, ptr a
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_1_30_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_30_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -141,7 +141,7 @@ define amdgpu_kernel void @v_uextract_bit_1_30_i64(ptr addrspace(1) %out, ptr ad
 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]]
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_1_31_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_31_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -160,7 +160,7 @@ define amdgpu_kernel void @v_uextract_bit_1_31_i64(ptr addrspace(1) %out, ptr ad
 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_31_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -177,7 +177,7 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64(ptr addrspace(1) %out, ptr a
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_32_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_32_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -195,7 +195,7 @@ define amdgpu_kernel void @v_uextract_bit_32_33_i64(ptr addrspace(1) %out, ptr a
 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_30_60_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_30_60_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -212,7 +212,7 @@ define amdgpu_kernel void @v_uextract_bit_30_60_i64(ptr addrspace(1) %out, ptr a
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_33_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -228,7 +228,7 @@ define amdgpu_kernel void @v_uextract_bit_33_63_i64(ptr addrspace(1) %out, ptr a
 ; GCN: buffer_load_dwordx2 v[[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]]
 ; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 31
 ; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_31_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -244,7 +244,7 @@ define amdgpu_kernel void @v_uextract_bit_31_63_i64(ptr addrspace(1) %out, ptr a
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
 ; GCN: buffer_store_dword v[[SHIFT]]
-define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
@@ -260,7 +260,7 @@ define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(ptr addrspace(1) %out
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}}
 ; GCN: buffer_store_dword [[BFE]]
-define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
@@ -276,7 +276,7 @@ define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(ptr addrspace(1) %out,
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}}
 ; GCN: buffer_store_dword [[BFE]]
-define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
@@ -294,7 +294,7 @@ define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(ptr addrspace(1) %out
 ; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
 ; GCN-NOT: v[[SHRLO]]
 ; GCN: buffer_store_dword v[[SHRLO]]
-define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
@@ -315,7 +315,7 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(ptr addrspace(1) %
 ; GCN-NOT: v[[SHRLO]]
 ; GCN-NOT: v[[SHRHI]]
 ; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[SHRHI]]]
-define amdgpu_kernel void @and_not_mask_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @and_not_mask_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -336,7 +336,7 @@ define amdgpu_kernel void @and_not_mask_i64(ptr addrspace(1) %out, ptr addrspace
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[SHRHI]]]
 ; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -356,7 +356,7 @@ define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(ptr addrspac
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
 ; GCN-DAG: buffer_store_dwordx2 v[[[SHR]]:[[ZERO_SHR]]]
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO_BFE]]]
-define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
@@ -374,7 +374,7 @@ define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(ptr addrspac
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:{{[0-9]+\]}}
 ; GCN: buffer_store_dword v[[ZERO]]
-define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) nounwind {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
   %out0.gep = getelementptr i64, ptr addrspace(1) %out0, i32 %id.x
@@ -390,9 +390,6 @@ define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(ptr add
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index c440392153adbd..d9c8f8f4faab3a 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -3,9 +3,9 @@
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=VI
 ; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs | FileCheck %s --check-prefixes=EG
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
 
 define amdgpu_kernel void @shl_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_v2i32:
@@ -457,7 +457,7 @@ define amdgpu_kernel void @shl_i16_computed_amount(ptr addrspace(1) %out, ptr ad
 ; EG-NEXT:     MOV * T0.Z, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
   %b_ptr = getelementptr i16, ptr addrspace(1) %gep, i16 1
@@ -616,7 +616,7 @@ define amdgpu_kernel void @shl_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in
 ; EG-NEXT:     OR_INT T0.X, PV.W, PS,
 ; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i32 %tid
   %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %gep, i16 1
@@ -738,7 +738,7 @@ define amdgpu_kernel void @shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ; EG-NEXT:     MOV T7.X, PV.Y,
 ; EG-NEXT:     MOV * T10.X, T6.X,
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i32 %tid
   %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %gep, i16 1
@@ -1123,7 +1123,7 @@ define amdgpu_kernel void @v_shl_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
 ; EG-NEXT:     LSHR T2.X, PV.W, literal.x,
 ; EG-NEXT:     MOV * T1.Y, T0.X,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %tid = call i32 @llvm.amdgcn.workgroup.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
   %a = load i64, ptr addrspace(1) %gep.in
@@ -2204,5 +2204,3 @@ define void @shl_or_k_two_uses(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i
   store i32 %tmp0, ptr addrspace(1) %out1
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
index b81af3eb838f1f..08b0014e1a1734 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-define amdgpu_kernel void @s_shl_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_shl_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <2 x i16> %rhs) nounwind {
 ; GFX9-LABEL: s_shl_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -87,7 +87,7 @@ define amdgpu_kernel void @s_shl_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <2
   ret void
 }
 
-define amdgpu_kernel void @v_shl_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_shl_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_shl_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -175,7 +175,7 @@ define amdgpu_kernel void @v_shl_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @shl_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @shl_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) nounwind {
 ; GFX9-LABEL: shl_v_s_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -268,7 +268,7 @@ define amdgpu_kernel void @shl_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @shl_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @shl_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) nounwind {
 ; GFX9-LABEL: shl_s_v_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -361,7 +361,7 @@ define amdgpu_kernel void @shl_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @shl_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @shl_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: shl_imm_v_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -447,7 +447,7 @@ define amdgpu_kernel void @shl_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @shl_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @shl_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: shl_v_imm_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -529,7 +529,7 @@ define amdgpu_kernel void @shl_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @v_shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: v_shl_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -630,7 +630,7 @@ define amdgpu_kernel void @v_shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @shl_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @shl_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX9-LABEL: shl_v_imm_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -725,7 +725,4 @@ define amdgpu_kernel void @shl_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
index 945b92a3662efb..ebbfcabf2964a0 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; Test with inline immediate
 
@@ -9,8 +9,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @shl_2_add_9_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @shl_2_add_9_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %ptr = getelementptr i32, ptr addrspace(1) %in, i32 %tid.x
   %val = load i32, ptr addrspace(1) %ptr, align 4
   %add = add i32 %val, 9
@@ -25,8 +25,8 @@ define amdgpu_kernel void @shl_2_add_9_i32(ptr addrspace(1) %out, ptr addrspace(
 ; SI-DAG: buffer_store_dword [[ADDREG]]
 ; SI-DAG: buffer_store_dword [[SHLREG]]
 ; SI: s_endpgm
-define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %ptr = getelementptr i32, ptr addrspace(1) %in, i32 %tid.x
   %val = load i32, ptr addrspace(1) %ptr, align 4
   %add = add i32 %val, 9
@@ -43,8 +43,8 @@ define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(ptr addrspace(1) %out0, pt
 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @shl_2_add_999_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @shl_2_add_999_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %ptr = getelementptr i32, ptr addrspace(1) %in, i32 %tid.x
   %val = load i32, ptr addrspace(1) %ptr, align 4
   %shl = add i32 %val, 999
@@ -60,7 +60,7 @@ define amdgpu_kernel void @shl_2_add_999_i32(ptr addrspace(1) %out, ptr addrspac
 ; SI: s_addk_i32 [[RESULT]], 0x3d8
 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
 ; SI: buffer_store_dword [[VRESULT]]
-define amdgpu_kernel void @test_add_shl_add_constant(ptr addrspace(1) %out, [8 x i32], i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @test_add_shl_add_constant(ptr addrspace(1) %out, [8 x i32], i32 %x, i32 %y) nounwind {
   %add.0 = add i32 %x, 123
   %shl = shl i32 %add.0, 3
   %add.1 = add i32 %shl, %y
@@ -76,13 +76,10 @@ define amdgpu_kernel void @test_add_shl_add_constant(ptr addrspace(1) %out, [8 x
 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
 ; SI: buffer_store_dword [[VRESULT]]
 
-define amdgpu_kernel void @test_add_shl_add_constant_inv(ptr addrspace(1) %out, [8 x i32], i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @test_add_shl_add_constant_inv(ptr addrspace(1) %out, [8 x i32], i32 %x, i32 %y) nounwind {
   %add.0 = add i32 %x, 123
   %shl = shl i32 %add.0, 3
   %add.1 = add i32 %y, %shl
   store i32 %add.1, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 9127cc3ffb34ee..8e576013cd0c3c 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -7,7 +7,7 @@
 ; LDS globals.
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 @lds0 = addrspace(3) global [512 x float] undef, align 4
 @lds1 = addrspace(3) global [512 x float] undef, align 4
@@ -19,8 +19,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @load_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -39,8 +39,8 @@ define amdgpu_kernel void @load_shl_base_lds_0(ptr addrspace(1) %out, ptr addrsp
 ; GCN-DAG: buffer_store_dword [[RESULT]]
 ; GCN-DAG: buffer_store_dword [[ADDUSE]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_1(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @load_shl_base_lds_1(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -55,8 +55,8 @@ define amdgpu_kernel void @load_shl_base_lds_1(ptr addrspace(1) %out, ptr addrsp
 ; GCN-LABEL: {{^}}load_shl_base_lds_max_offset
 ; GCN: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
 ; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @load_shl_base_lds_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 65535
   %arrayidx0 = getelementptr inbounds [65536 x i8], ptr addrspace(3) @maxlds, i32 0, i32 %idx.0
   %val0 = load i8, ptr addrspace(3) %arrayidx0
@@ -73,8 +73,8 @@ define amdgpu_kernel void @load_shl_base_lds_max_offset(ptr addrspace(1) %out, p
 ; GCN: s_mov_b32 m0, -1
 ; GCN: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
 ; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_2(ptr addrspace(1) %out) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @load_shl_base_lds_2(ptr addrspace(1) %out) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 64
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
   %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
@@ -89,8 +89,8 @@ define amdgpu_kernel void @load_shl_base_lds_2(ptr addrspace(1) %out) #0 {
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @store_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @store_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
   store float 1.0, ptr addrspace(3) %arrayidx0, align 4
@@ -104,8 +104,8 @@ define amdgpu_kernel void @store_shl_base_lds_0(ptr addrspace(1) %out, ptr addrs
 
 @lds2 = addrspace(3) global [512 x i32] undef, align 4
 
-; define amdgpu_kernel void @atomic_load_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-;   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+; define amdgpu_kernel void @atomic_load_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+;   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 ;   %idx.0 = add nsw i32 %tid.x, 2
 ;   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
 ;   %val = load atomic i32, ptr addrspace(3) %arrayidx0 seq_cst, align 4
@@ -119,8 +119,8 @@ define amdgpu_kernel void @store_shl_base_lds_0(ptr addrspace(1) %out, ptr addrs
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use, i32 %swap) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use, i32 %swap) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %pair = cmpxchg ptr addrspace(3) %arrayidx0, i32 7, i32 %swap seq_cst monotonic
@@ -134,8 +134,8 @@ define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(ptr addrspace(1) %out,
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_swap_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_swap_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw xchg ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -148,8 +148,8 @@ define amdgpu_kernel void @atomic_swap_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_add_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_add_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw add ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -162,8 +162,8 @@ define amdgpu_kernel void @atomic_add_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_sub_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_sub_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw sub ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -176,8 +176,8 @@ define amdgpu_kernel void @atomic_sub_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_and_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_and_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw and ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -190,8 +190,8 @@ define amdgpu_kernel void @atomic_and_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_or_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_or_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw or ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -204,8 +204,8 @@ define amdgpu_kernel void @atomic_or_shl_base_lds_0(ptr addrspace(1) %out, ptr a
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_xor_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_xor_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw xor ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -214,8 +214,8 @@ define amdgpu_kernel void @atomic_xor_shl_base_lds_0(ptr addrspace(1) %out, ptr
   ret void
 }
 
-; define amdgpu_kernel void @atomic_nand_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-;   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+; define amdgpu_kernel void @atomic_nand_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+;   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 ;   %idx.0 = add nsw i32 %tid.x, 2
 ;   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
 ;   %val = atomicrmw nand ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -228,8 +228,8 @@ define amdgpu_kernel void @atomic_xor_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_min_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_min_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw min ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -242,8 +242,8 @@ define amdgpu_kernel void @atomic_min_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_max_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_max_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw max ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -256,8 +256,8 @@ define amdgpu_kernel void @atomic_max_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_umin_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_umin_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw umin ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -270,8 +270,8 @@ define amdgpu_kernel void @atomic_umin_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_umax_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_umax_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw umax ptr addrspace(3) %arrayidx0, i32 3 seq_cst
@@ -284,8 +284,8 @@ define amdgpu_kernel void @atomic_umax_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_inc_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_inc_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw uinc_wrap ptr addrspace(3) %arrayidx0, i32 31 seq_cst
@@ -298,8 +298,8 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
-  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) nounwind {
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
   %val = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 31 seq_cst
@@ -313,7 +313,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr
 ; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
 ; GCN: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:32
 ; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+}} offset:64
-define void @shl_add_ptr_combine_2use_lds(i32 %idx) #0 {
+define void @shl_add_ptr_combine_2use_lds(i32 %idx) nounwind {
   %idx.add = add nuw i32 %idx, 4
   %shl0 = shl i32 %idx.add, 3
   %shl1 = shl i32 %idx.add, 4
@@ -330,7 +330,7 @@ define void @shl_add_ptr_combine_2use_lds(i32 %idx) #0 {
 ; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528
 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 0x1fff0, [[SCALE1]]
 ; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}}
-define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
+define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) nounwind {
   %idx.add = add nuw i32 %idx, 8191
   %shl0 = shl i32 %idx.add, 3
   %shl1 = shl i32 %idx.add, 4
@@ -347,7 +347,7 @@ define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
 ; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+$}}
 ; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+$}}
-define void @shl_add_ptr_combine_2use_both_max_lds_offset(i32 %idx) #0 {
+define void @shl_add_ptr_combine_2use_both_max_lds_offset(i32 %idx) nounwind {
   %idx.add = add nuw i32 %idx, 4096
   %shl0 = shl i32 %idx.add, 4
   %shl1 = shl i32 %idx.add, 5
@@ -363,7 +363,7 @@ define void @shl_add_ptr_combine_2use_both_max_lds_offset(i32 %idx) #0 {
 ; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 3, v0
 ; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], 0 offen offset:16
 ; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], 0 offen offset:32
-define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) #0 {
+define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) nounwind {
   %idx = zext i16 %idx.arg to i32
   %idx.add = add nuw i32 %idx, 4
   %shl0 = shl i32 %idx.add, 2
@@ -381,7 +381,7 @@ define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) #0 {
 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], 0 offen offset:4088
 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]]
 ; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[0:3], 0 offen{{$}}
-define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #0 {
+define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) nounwind {
   %idx = zext i16 %idx.arg to i32
   %idx.add = add nuw i32 %idx, 511
   %shl0 = shl i32 %idx.add, 3
@@ -398,7 +398,7 @@ define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #
 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], 0 offen{{$}}
 ; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], 0 offen{{$}}
-define void @shl_add_ptr_combine_2use_both_max_private_offset(i16 zeroext %idx.arg) #0 {
+define void @shl_add_ptr_combine_2use_both_max_private_offset(i16 zeroext %idx.arg) nounwind {
   %idx = zext i16 %idx.arg to i32
   %idx.add = add nuw i32 %idx, 256
   %shl0 = shl i32 %idx.add, 4
@@ -413,7 +413,7 @@ define void @shl_add_ptr_combine_2use_both_max_private_offset(i16 zeroext %idx.a
 ; GCN-LABEL: {{^}}shl_or_ptr_combine_2use_lds:
 ; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:8
 ; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-define void @shl_or_ptr_combine_2use_lds(i32 %idx) #0 {
+define void @shl_or_ptr_combine_2use_lds(i32 %idx) nounwind {
   %idx.shl = shl i32 %idx, 1
   %idx.add = or i32 %idx.shl, 1
   %shl0 = shl i32 %idx.add, 3
@@ -430,7 +430,7 @@ define void @shl_or_ptr_combine_2use_lds(i32 %idx) #0 {
 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, [[OR]]
 ; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}}{{$}}
 ; GCN-DAG: ds_write_b32 [[SCALE1]], v{{[0-9]+}}{{$}}
-define void @shl_or_ptr_not_combine_2use_lds(i32 %idx) #0 {
+define void @shl_or_ptr_not_combine_2use_lds(i32 %idx) nounwind {
   %idx.add = or i32 %idx, 1
   %shl0 = shl i32 %idx.add, 3
   %shl1 = shl i32 %idx.add, 4
@@ -440,6 +440,3 @@ define void @shl_or_ptr_not_combine_2use_lds(i32 %idx) #0 {
   store volatile i32 10, ptr addrspace(3) %ptr1
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
index c04cb89e9527b6..39e567ac5861d2 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
@@ -7,7 +7,7 @@
 ; GCN: v_add_co_ci_u32_e32 v[[EXTRA_HI:[0-9]+]], vcc_lo, 0, v5, vcc_lo
 ; GCN: global_atomic_csub v{{[0-9]+}}, v[[[LO]]:[[HI]]], [[K]], off offset:512 glc
 ; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
-define i32 @shl_base_atomicrmw_global_atomic_csub_ptr(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) #0 {
+define i32 @shl_base_atomicrmw_global_atomic_csub_ptr(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) argmemonly nounwind {
   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
   %cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
   %shl = shl i64 %cast, 2
@@ -17,6 +17,4 @@ define i32 @shl_base_atomicrmw_global_atomic_csub_ptr(ptr addrspace(1) %out, ptr
   ret i32 %val
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0
-
-attributes #0 = { argmemonly nounwind }
+declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) argmemonly nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
index 39541537b3647a..c83399a20e4455 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s
 
-define void @shl_base_atomicrmw_global_ptr(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) #0 {
+define void @shl_base_atomicrmw_global_ptr(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) nounwind {
 ; GCN-LABEL: shl_base_atomicrmw_global_ptr:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -24,7 +24,7 @@ define void @shl_base_atomicrmw_global_ptr(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) #0 {
+define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) nounwind {
 ; GCN-LABEL: shl_base_global_ptr_global_atomic_fadd:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -45,7 +45,4 @@ define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { argmemonly nounwind willreturn }
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) argmemonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index 1a55bf608ebf51..0b4a3c2228d416 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -13,7 +13,7 @@
 ; Test that add/sub with a constant is swapped to sub/add with negated
 ; constant to minimize code size.
 
-define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i32_x_sub_64:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -126,7 +126,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i32_x_sub_64_multi_use:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -284,7 +284,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_test_i32_64_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i32_64_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i32_64_sub_x:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -397,7 +397,7 @@ define amdgpu_kernel void @v_test_i32_64_sub_x(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i32_x_sub_65:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -545,7 +545,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_test_i32_65_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i32_65_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i32_65_sub_x:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -658,7 +658,7 @@ define amdgpu_kernel void @v_test_i32_65_sub_x(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i32_x_sub_neg16:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -806,7 +806,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @v_test_i32_neg16_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i32_neg16_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i32_neg16_sub_x:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -919,7 +919,7 @@ define amdgpu_kernel void @v_test_i32_neg16_sub_x(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i32_x_sub_neg17:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1067,7 +1067,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @v_test_i32_neg17_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i32_neg17_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i32_neg17_sub_x:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1180,7 +1180,7 @@ define amdgpu_kernel void @v_test_i32_neg17_sub_x(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @s_test_i32_x_sub_64(i32 %x) #0 {
+define amdgpu_kernel void @s_test_i32_x_sub_64(i32 %x) nounwind {
 ; SI-LABEL: s_test_i32_x_sub_64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s0, s[0:1], 0x9
@@ -1235,7 +1235,7 @@ define amdgpu_kernel void @s_test_i32_x_sub_64(i32 %x) #0 {
   ret void
 }
 
-define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i16_x_sub_64:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1348,7 +1348,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1474,7 +1474,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1632,7 +1632,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_sub_64_64:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1760,7 +1760,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_sub_7_64:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1901,7 +1901,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_sub_64_123:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2043,7 +2043,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr
 }
 
 ; Can fold 0 and inline immediate in other half.
-define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_sub_7_0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2168,7 +2168,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr add
 }
 
 ; Can fold 0 and inline immediate in other half.
-define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_sub_0_16:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2290,7 +2290,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_sub_0_1_0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2425,7 +2425,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_sub_0_neg1_0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2561,7 +2561,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
 }
 
 ; -32 isn't an inline immediate, but 32 is
-define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_neg32_neg32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2689,7 +2689,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_0_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_0_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_0_neg32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2811,7 +2811,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_0_neg32(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_neg32_0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2936,7 +2936,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(ptr addrspace(1) %out, ptr
 }
 
 ; 16 and -16 are both inline immediates
-define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_neg16_neg16:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3064,7 +3064,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_0_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_0_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_0_neg16:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3186,7 +3186,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_0_neg16(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_neg16_0:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3310,7 +3310,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3475,7 +3475,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3640,7 +3640,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_neg_fptwo:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3768,7 +3768,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_neg_negfptwo:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3896,7 +3896,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4017,7 +4017,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-SDAG-LABEL: v_test_v2i16_x_add_neg32_undef:
 ; SI-SDAG:       ; %bb.0:
 ; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -4171,7 +4171,4 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll
index cef959f45437db..ae733b866aacd3 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll
@@ -121,14 +121,9 @@ endif2:
 }
 
 
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32 immarg) #2
-declare i1 @llvm.amdgcn.wqm.vote(i1) #3
-declare void @llvm.amdgcn.kill(i1) #4
-declare float @llvm.amdgcn.wqm.f32(float) #1
-
-attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { nounwind willreturn memory(argmem: readwrite) }
-attributes #3 = { convergent nounwind readnone willreturn }
-attributes #4 = { nounwind }
+declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32 immarg) nounwind willreturn memory(argmem: readwrite)
+declare i1 @llvm.amdgcn.wqm.vote(i1) convergent nounwind readnone willreturn
+declare void @llvm.amdgcn.kill(i1) nounwind
+declare float @llvm.amdgcn.wqm.f32(float) nounwind readnone speculatable willreturn
 
 !0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
index 2495c0dff89297..a3b56926d807a7 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
@@ -9,7 +9,7 @@
 ; GCN: s_cbranch_scc1
 ; GCN-NOT: s_endpgm
 ; GCN: .Lfunc_end0
-define amdgpu_kernel void @annotate_unreachable_noloop(ptr addrspace(1) noalias nocapture readonly %arg) #0 {
+define amdgpu_kernel void @annotate_unreachable_noloop(ptr addrspace(1) noalias nocapture readonly %arg) nounwind {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   br label %bb1
@@ -42,7 +42,7 @@ bb5:                                              ; preds = %bb3, %bb1
 ; GCN: s_and_saveexec_b64
 ; GCN-NEXT: s_endpgm
 ; GCN: .Lfunc_end
-define amdgpu_kernel void @annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg) #0 {
+define amdgpu_kernel void @annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg) nounwind {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   br label %bb1
@@ -75,7 +75,7 @@ bb5:                                              ; preds = %bb3, %bb1
 ; GCN: s_cbranch_scc1
 ; GCN: s_endpgm
 ; GCN: .Lfunc_end
-define amdgpu_kernel void @uniform_annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg, i32 %tmp) #0 {
+define amdgpu_kernel void @uniform_annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg, i32 %tmp) nounwind {
 bb:
   br label %bb1
 
@@ -98,7 +98,4 @@ bb5:                                              ; preds = %bb3, %bb1
 }
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll
index 165b996981e34f..0382462418feb7 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll
@@ -11,7 +11,7 @@
 ; GCN: s_and_saveexec_b64
 ; GCN-NOT: s_endpgm
 ; GCN: .Lfunc_end0
-define amdgpu_kernel void @annotate_unreachable(ptr addrspace(1) noalias nocapture readonly %arg, i1 %c0) #0 {
+define amdgpu_kernel void @annotate_unreachable(ptr addrspace(1) noalias nocapture readonly %arg, i1 %c0) nounwind {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   br label %bb1
@@ -34,7 +34,4 @@ bb5:                                              ; preds = %bb3, %bb1
   unreachable
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
index 1ab63762ecbd72..a8e90f646e7fcb 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
@@ -53,7 +53,7 @@ define amdgpu_kernel void @break_inserted_outside_of_loop(ptr addrspace(1) %out,
 ; FLAT-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; FLAT-NEXT:    s_endpgm
 main_body:
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %0 = and i32 %a, %tid
   %1 = trunc i32 %0 to i1
   br label %ENDIF
@@ -117,7 +117,7 @@ define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
 ; FLAT-NEXT:  ; %bb.4: ; %exit
 ; FLAT-NEXT:    s_endpgm
 entry:
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %0 = icmp eq i32 %tid , 0
   br i1 %0, label %if, label %else
 
@@ -246,6 +246,4 @@ return:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll
index a7af02017001fb..cfd54a78510244 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll
@@ -93,9 +93,7 @@ exit:                                             ; preds = %loop
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare void @llvm.dbg.value(metadata, metadata, metadata) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 !llvm.dbg.cu = !{!0}
 !llvm.debugify = !{!2, !3}
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll
index 917743bf5d14cb..e8dec89d66fae0 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll
@@ -68,6 +68,4 @@ endif:
   ret void
 }
 
-declare void @llvm.amdgcn.kill(i1) #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.kill(i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
index 13745d4d5b171d..f6b138d00f578d 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
@@ -12,7 +12,7 @@
 ; GCN-NEXT: BB0_{{[0-9]+}}: ; %UnifiedReturnBlock
 ; GCN: s_endpgm
 
-define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 {
+define amdgpu_kernel void @lower_control_flow_unreachable_terminator() nounwind {
 bb:
   %tmp15 = tail call i32 @llvm.amdgcn.workitem.id.y()
   %tmp63 = icmp eq i32 %tmp15, 32
@@ -37,7 +37,7 @@ ret:
 
 ; GCN: BB1_{{[0-9]+}}:
 ; GCN-NEXT: s_endpgm
-define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
+define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() nounwind {
 bb:
   %tmp15 = tail call i32 @llvm.amdgcn.workitem.id.y()
   %tmp63 = icmp eq i32 %tmp15, 32
@@ -60,7 +60,7 @@ unreachable:
 
 ; GCN: [[UNREACHABLE]]:
 ; GCN: ds_write_b32
-define amdgpu_kernel void @uniform_lower_control_flow_unreachable_terminator(i32 %arg0) #0 {
+define amdgpu_kernel void @uniform_lower_control_flow_unreachable_terminator(i32 %arg0) nounwind {
 bb:
   %tmp63 = icmp eq i32 %arg0, 32
   br i1 %tmp63, label %unreachable, label %ret
@@ -73,8 +73,4 @@ ret:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir b/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir
index f234ea24a9fe7a..5c7de73b12b243 100644
--- a/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-opt-vgpr-liverange-bug-deadlanes.mir
@@ -6,11 +6,9 @@
 # DetectDeadLanes.
 
 --- |
-  define dllexport amdgpu_ps void @_amdgpu_ps_main() #0 {
+  define dllexport amdgpu_ps void @_amdgpu_ps_main() "target-cpu"="gfx1100" "target-features"=",+wavefrontsize64,+cumode" "uniform-work-group-size"="false" {
     unreachable
   }
-
-  attributes #0 = { "target-cpu"="gfx1100" "target-features"=",+wavefrontsize64,+cumode" "uniform-work-group-size"="false" }
 ...
 ---
 name:            _amdgpu_ps_main
diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll
index d34769ad0fcf0a..e618c11bb3e304 100644
--- a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.dbg.value(metadata, metadata, metadata) #0
+declare void @llvm.dbg.value(metadata, metadata, metadata) nocallback nofree nosync nounwind speculatable willreturn memory(none)
 
 define void @__omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined(i1 %arg) {
 ; GCN-LABEL: __omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined:
@@ -59,8 +59,6 @@ bb3:                                              ; preds = %bb2, %bb1
   ret void
 }
 
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3}
 
diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir
index 3bdcc14936fb9b..c3fc969ebc57ea 100644
--- a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.mir
@@ -3,11 +3,9 @@
 # SIOptimizeVGPRLiveRange shouldn't try to modify use of %5 in DBG_VALUE_LIST
 
 --- |
-  define void @dbg_instr_use(i1 %arg) #1 {
+  define void @dbg_instr_use(i1 %arg) "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx908" "uniform-work-group-size"="false" {
     ret void
   }
-
-  attributes #1 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx908" "uniform-work-group-size"="false" }
   !llvm.dbg.cu = !{!0}
   !llvm.module.flags = !{!3}
 
diff --git a/llvm/test/CodeGen/AMDGPU/si-scheduler.ll b/llvm/test/CodeGen/AMDGPU/si-scheduler.ll
index 516fabce9c1cb9..6ab39f6838f914 100644
--- a/llvm/test/CodeGen/AMDGPU/si-scheduler.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-scheduler.ll
@@ -17,7 +17,7 @@
 ; CHECK: s_waitcnt vmcnt(0)
 ; CHECK: exp
 ; CHECK: s_endpgm
-define amdgpu_ps void @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+define amdgpu_ps void @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) nounwind {
 main_body:
   %tmp22 = load <32 x i8>, ptr addrspace(4) %arg3, align 32, !tbaa !0
   %tmp24 = load <16 x i8>, ptr addrspace(4) %arg2, align 16, !tbaa !0
@@ -25,14 +25,14 @@ main_body:
   %j.i = extractelement <2 x i32> %arg11, i32 1
   %i.f.i = bitcast i32 %i.i to float
   %j.f.i = bitcast i32 %j.i to float
-  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) #1
-  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) #1
+  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg5) nounwind readnone
+  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg5) nounwind readnone
   %i.i1 = extractelement <2 x i32> %arg11, i32 0
   %j.i2 = extractelement <2 x i32> %arg11, i32 1
   %i.f.i3 = bitcast i32 %i.i1 to float
   %j.f.i4 = bitcast i32 %j.i2 to float
-  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1
-  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1
+  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) nounwind readnone
+  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) nounwind readnone
   %tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32>
   %tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32>
   %tmp31 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i1 0, i32 0, i32 0)
@@ -43,19 +43,15 @@ main_body:
   %tmp35 = extractelement <4 x float> %tmp31, i32 3
   %tmp36 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp32, float %tmp33)
   %tmp38 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp34, float %tmp35)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp36, <2 x half> %tmp38, i1 true, i1 false) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp36, <2 x half> %tmp38, i1 true, i1 false) nounwind
   ret void
 }
 
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind readnone
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
 
 !0 = !{!1, !1, i64 0, i32 1}
 !1 = !{!"const", !2}
@@ -70,7 +66,7 @@ define amdgpu_ps void @_amdgpu_ps_main(i32 %arg) local_unnamed_addr {
   %tmp1 = bitcast <2 x i32> %tmp to i64
   %tmp2 = inttoptr i64 %tmp1 to ptr addrspace(4)
   %tmp3 = load <4 x i32>, ptr addrspace(4) %tmp2, align 16
-  %tmp4 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp3, i32 0, i32 0) #0
+  %tmp4 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp3, i32 0, i32 0) nounwind
   switch i32 %tmp4, label %bb [
     i32 0, label %bb5
     i32 1, label %bb6
@@ -87,4 +83,4 @@ bb6:                                              ; preds = %.entry
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #1
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll
index a14c456952db71..b51b90d0db4ae5 100644
--- a/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -108,104 +108,104 @@ main_body:
   %j.i = extractelement <2 x i32> %arg6, i32 1
   %i.f.i = bitcast i32 %i.i to float
   %j.f.i = bitcast i32 %j.i to float
-  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg4) #0
-  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg4) #0
+  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg4) nounwind
+  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg4) nounwind
   %i.i91 = extractelement <2 x i32> %arg6, i32 0
   %j.i92 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i93 = bitcast i32 %i.i91 to float
   %j.f.i94 = bitcast i32 %j.i92 to float
-  %p1.i95 = call float @llvm.amdgcn.interp.p1(float %i.f.i93, i32 1, i32 0, i32 %arg4) #0
-  %p2.i96 = call float @llvm.amdgcn.interp.p2(float %p1.i95, float %j.f.i94, i32 1, i32 0, i32 %arg4) #0
+  %p1.i95 = call float @llvm.amdgcn.interp.p1(float %i.f.i93, i32 1, i32 0, i32 %arg4) nounwind
+  %p2.i96 = call float @llvm.amdgcn.interp.p2(float %p1.i95, float %j.f.i94, i32 1, i32 0, i32 %arg4) nounwind
   %i.i85 = extractelement <2 x i32> %arg6, i32 0
   %j.i86 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i87 = bitcast i32 %i.i85 to float
   %j.f.i88 = bitcast i32 %j.i86 to float
-  %p1.i89 = call float @llvm.amdgcn.interp.p1(float %i.f.i87, i32 0, i32 1, i32 %arg4) #0
-  %p2.i90 = call float @llvm.amdgcn.interp.p2(float %p1.i89, float %j.f.i88, i32 0, i32 1, i32 %arg4) #0
+  %p1.i89 = call float @llvm.amdgcn.interp.p1(float %i.f.i87, i32 0, i32 1, i32 %arg4) nounwind
+  %p2.i90 = call float @llvm.amdgcn.interp.p2(float %p1.i89, float %j.f.i88, i32 0, i32 1, i32 %arg4) nounwind
   %i.i79 = extractelement <2 x i32> %arg6, i32 0
   %j.i80 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i81 = bitcast i32 %i.i79 to float
   %j.f.i82 = bitcast i32 %j.i80 to float
-  %p1.i83 = call float @llvm.amdgcn.interp.p1(float %i.f.i81, i32 1, i32 1, i32 %arg4) #0
-  %p2.i84 = call float @llvm.amdgcn.interp.p2(float %p1.i83, float %j.f.i82, i32 1, i32 1, i32 %arg4) #0
+  %p1.i83 = call float @llvm.amdgcn.interp.p1(float %i.f.i81, i32 1, i32 1, i32 %arg4) nounwind
+  %p2.i84 = call float @llvm.amdgcn.interp.p2(float %p1.i83, float %j.f.i82, i32 1, i32 1, i32 %arg4) nounwind
   %i.i73 = extractelement <2 x i32> %arg6, i32 0
   %j.i74 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i75 = bitcast i32 %i.i73 to float
   %j.f.i76 = bitcast i32 %j.i74 to float
-  %p1.i77 = call float @llvm.amdgcn.interp.p1(float %i.f.i75, i32 2, i32 1, i32 %arg4) #0
-  %p2.i78 = call float @llvm.amdgcn.interp.p2(float %p1.i77, float %j.f.i76, i32 2, i32 1, i32 %arg4) #0
+  %p1.i77 = call float @llvm.amdgcn.interp.p1(float %i.f.i75, i32 2, i32 1, i32 %arg4) nounwind
+  %p2.i78 = call float @llvm.amdgcn.interp.p2(float %p1.i77, float %j.f.i76, i32 2, i32 1, i32 %arg4) nounwind
   %i.i67 = extractelement <2 x i32> %arg6, i32 0
   %j.i68 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i69 = bitcast i32 %i.i67 to float
   %j.f.i70 = bitcast i32 %j.i68 to float
-  %p1.i71 = call float @llvm.amdgcn.interp.p1(float %i.f.i69, i32 0, i32 2, i32 %arg4) #0
-  %p2.i72 = call float @llvm.amdgcn.interp.p2(float %p1.i71, float %j.f.i70, i32 0, i32 2, i32 %arg4) #0
+  %p1.i71 = call float @llvm.amdgcn.interp.p1(float %i.f.i69, i32 0, i32 2, i32 %arg4) nounwind
+  %p2.i72 = call float @llvm.amdgcn.interp.p2(float %p1.i71, float %j.f.i70, i32 0, i32 2, i32 %arg4) nounwind
   %i.i61 = extractelement <2 x i32> %arg6, i32 0
   %j.i62 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i63 = bitcast i32 %i.i61 to float
   %j.f.i64 = bitcast i32 %j.i62 to float
-  %p1.i65 = call float @llvm.amdgcn.interp.p1(float %i.f.i63, i32 1, i32 2, i32 %arg4) #0
-  %p2.i66 = call float @llvm.amdgcn.interp.p2(float %p1.i65, float %j.f.i64, i32 1, i32 2, i32 %arg4) #0
+  %p1.i65 = call float @llvm.amdgcn.interp.p1(float %i.f.i63, i32 1, i32 2, i32 %arg4) nounwind
+  %p2.i66 = call float @llvm.amdgcn.interp.p2(float %p1.i65, float %j.f.i64, i32 1, i32 2, i32 %arg4) nounwind
   %i.i55 = extractelement <2 x i32> %arg6, i32 0
   %j.i56 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i57 = bitcast i32 %i.i55 to float
   %j.f.i58 = bitcast i32 %j.i56 to float
-  %p1.i59 = call float @llvm.amdgcn.interp.p1(float %i.f.i57, i32 2, i32 2, i32 %arg4) #0
-  %p2.i60 = call float @llvm.amdgcn.interp.p2(float %p1.i59, float %j.f.i58, i32 2, i32 2, i32 %arg4) #0
+  %p1.i59 = call float @llvm.amdgcn.interp.p1(float %i.f.i57, i32 2, i32 2, i32 %arg4) nounwind
+  %p2.i60 = call float @llvm.amdgcn.interp.p2(float %p1.i59, float %j.f.i58, i32 2, i32 2, i32 %arg4) nounwind
   %i.i49 = extractelement <2 x i32> %arg6, i32 0
   %j.i50 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i51 = bitcast i32 %i.i49 to float
   %j.f.i52 = bitcast i32 %j.i50 to float
-  %p1.i53 = call float @llvm.amdgcn.interp.p1(float %i.f.i51, i32 0, i32 3, i32 %arg4) #0
-  %p2.i54 = call float @llvm.amdgcn.interp.p2(float %p1.i53, float %j.f.i52, i32 0, i32 3, i32 %arg4) #0
+  %p1.i53 = call float @llvm.amdgcn.interp.p1(float %i.f.i51, i32 0, i32 3, i32 %arg4) nounwind
+  %p2.i54 = call float @llvm.amdgcn.interp.p2(float %p1.i53, float %j.f.i52, i32 0, i32 3, i32 %arg4) nounwind
   %i.i43 = extractelement <2 x i32> %arg6, i32 0
   %j.i44 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i45 = bitcast i32 %i.i43 to float
   %j.f.i46 = bitcast i32 %j.i44 to float
-  %p1.i47 = call float @llvm.amdgcn.interp.p1(float %i.f.i45, i32 1, i32 3, i32 %arg4) #0
-  %p2.i48 = call float @llvm.amdgcn.interp.p2(float %p1.i47, float %j.f.i46, i32 1, i32 3, i32 %arg4) #0
+  %p1.i47 = call float @llvm.amdgcn.interp.p1(float %i.f.i45, i32 1, i32 3, i32 %arg4) nounwind
+  %p2.i48 = call float @llvm.amdgcn.interp.p2(float %p1.i47, float %j.f.i46, i32 1, i32 3, i32 %arg4) nounwind
   %i.i37 = extractelement <2 x i32> %arg6, i32 0
   %j.i38 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i39 = bitcast i32 %i.i37 to float
   %j.f.i40 = bitcast i32 %j.i38 to float
-  %p1.i41 = call float @llvm.amdgcn.interp.p1(float %i.f.i39, i32 2, i32 3, i32 %arg4) #0
-  %p2.i42 = call float @llvm.amdgcn.interp.p2(float %p1.i41, float %j.f.i40, i32 2, i32 3, i32 %arg4) #0
+  %p1.i41 = call float @llvm.amdgcn.interp.p1(float %i.f.i39, i32 2, i32 3, i32 %arg4) nounwind
+  %p2.i42 = call float @llvm.amdgcn.interp.p2(float %p1.i41, float %j.f.i40, i32 2, i32 3, i32 %arg4) nounwind
   %i.i31 = extractelement <2 x i32> %arg6, i32 0
   %j.i32 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i33 = bitcast i32 %i.i31 to float
   %j.f.i34 = bitcast i32 %j.i32 to float
-  %p1.i35 = call float @llvm.amdgcn.interp.p1(float %i.f.i33, i32 0, i32 4, i32 %arg4) #0
-  %p2.i36 = call float @llvm.amdgcn.interp.p2(float %p1.i35, float %j.f.i34, i32 0, i32 4, i32 %arg4) #0
+  %p1.i35 = call float @llvm.amdgcn.interp.p1(float %i.f.i33, i32 0, i32 4, i32 %arg4) nounwind
+  %p2.i36 = call float @llvm.amdgcn.interp.p2(float %p1.i35, float %j.f.i34, i32 0, i32 4, i32 %arg4) nounwind
   %i.i25 = extractelement <2 x i32> %arg6, i32 0
   %j.i26 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i27 = bitcast i32 %i.i25 to float
   %j.f.i28 = bitcast i32 %j.i26 to float
-  %p1.i29 = call float @llvm.amdgcn.interp.p1(float %i.f.i27, i32 1, i32 4, i32 %arg4) #0
-  %p2.i30 = call float @llvm.amdgcn.interp.p2(float %p1.i29, float %j.f.i28, i32 1, i32 4, i32 %arg4) #0
+  %p1.i29 = call float @llvm.amdgcn.interp.p1(float %i.f.i27, i32 1, i32 4, i32 %arg4) nounwind
+  %p2.i30 = call float @llvm.amdgcn.interp.p2(float %p1.i29, float %j.f.i28, i32 1, i32 4, i32 %arg4) nounwind
   %i.i19 = extractelement <2 x i32> %arg6, i32 0
   %j.i20 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i21 = bitcast i32 %i.i19 to float
   %j.f.i22 = bitcast i32 %j.i20 to float
-  %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 2, i32 4, i32 %arg4) #0
-  %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 2, i32 4, i32 %arg4) #0
+  %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 2, i32 4, i32 %arg4) nounwind
+  %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 2, i32 4, i32 %arg4) nounwind
   %i.i13 = extractelement <2 x i32> %arg6, i32 0
   %j.i14 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i15 = bitcast i32 %i.i13 to float
   %j.f.i16 = bitcast i32 %j.i14 to float
-  %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 5, i32 %arg4) #0
-  %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 5, i32 %arg4) #0
+  %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 0, i32 5, i32 %arg4) nounwind
+  %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 0, i32 5, i32 %arg4) nounwind
   %i.i7 = extractelement <2 x i32> %arg6, i32 0
   %j.i8 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i9 = bitcast i32 %i.i7 to float
   %j.f.i10 = bitcast i32 %j.i8 to float
-  %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 5, i32 %arg4) #0
-  %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 5, i32 %arg4) #0
+  %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 1, i32 5, i32 %arg4) nounwind
+  %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 1, i32 5, i32 %arg4) nounwind
   %i.i1 = extractelement <2 x i32> %arg6, i32 0
   %j.i2 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i3 = bitcast i32 %i.i1 to float
   %j.f.i4 = bitcast i32 %j.i2 to float
-  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 5, i32 %arg4) #0
-  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 5, i32 %arg4) #0
+  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 5, i32 %arg4) nounwind
+  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 5, i32 %arg4) nounwind
   %mbcnt.lo.0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
   %tmp109 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo.0)
   %tmp110 = getelementptr [64 x i32], ptr addrspace(3) %lds, i32 0, i32 %tmp109
@@ -518,7 +518,7 @@ IF67:                                             ; preds = %LOOP65
   %tmp461 = fmul float %tmp454, %tmp458
   %tmp462 = fadd float %tmp461, 1.500000e+00
   %tmp91.bc = bitcast <4 x i32> %tmp91 to <4 x i32>
-  %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %tmp462, float %tmp460, float %tmp456, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i1 0, i32 0, i32 0) #0
+  %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %tmp462, float %tmp460, float %tmp456, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i1 0, i32 0, i32 0) nounwind
   %tmp471 = extractelement <4 x float> %tmp470, i32 0
   %tmp472 = extractelement <4 x float> %tmp470, i32 1
   %tmp473 = extractelement <4 x float> %tmp470, i32 2
@@ -618,7 +618,7 @@ IF67:                                             ; preds = %LOOP65
   %tmp580 = fadd float %tmp579, %tmp556
   %tmp581 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp576, float %tmp578)
   %tmp583 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp580, float %tmp282)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp581, <2 x half> %tmp583, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp581, <2 x half> %tmp583, i1 true, i1 true) nounwind
   ret void
 
 ENDIF66:                                          ; preds = %LOOP65
@@ -648,7 +648,7 @@ ENDIF66:                                          ; preds = %LOOP65
 ; GCN-LABEL: {{^}}main1:
 ; GCN: s_endpgm
 ; TOVGPR: ScratchSize: 0{{$}}
-define amdgpu_ps void @main1(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @main1(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) nounwind {
 main_body:
   %tmp21 = load <4 x i32>, ptr addrspace(4) %arg, !tbaa !0
   %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp21, i32 0, i32 0)
@@ -794,194 +794,194 @@ main_body:
   %j.i = extractelement <2 x i32> %arg6, i32 1
   %i.f.i = bitcast i32 %i.i to float
   %j.f.i = bitcast i32 %j.i to float
-  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg4) #0
-  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg4) #0
+  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 0, i32 0, i32 %arg4) nounwind
+  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 0, i32 0, i32 %arg4) nounwind
   %i.i181 = extractelement <2 x i32> %arg6, i32 0
   %j.i182 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i183 = bitcast i32 %i.i181 to float
   %j.f.i184 = bitcast i32 %j.i182 to float
-  %p1.i185 = call float @llvm.amdgcn.interp.p1(float %i.f.i183, i32 1, i32 0, i32 %arg4) #0
-  %p2.i186 = call float @llvm.amdgcn.interp.p2(float %p1.i185, float %j.f.i184, i32 1, i32 0, i32 %arg4) #0
+  %p1.i185 = call float @llvm.amdgcn.interp.p1(float %i.f.i183, i32 1, i32 0, i32 %arg4) nounwind
+  %p2.i186 = call float @llvm.amdgcn.interp.p2(float %p1.i185, float %j.f.i184, i32 1, i32 0, i32 %arg4) nounwind
   %i.i175 = extractelement <2 x i32> %arg6, i32 0
   %j.i176 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i177 = bitcast i32 %i.i175 to float
   %j.f.i178 = bitcast i32 %j.i176 to float
-  %p1.i179 = call float @llvm.amdgcn.interp.p1(float %i.f.i177, i32 2, i32 0, i32 %arg4) #0
-  %p2.i180 = call float @llvm.amdgcn.interp.p2(float %p1.i179, float %j.f.i178, i32 2, i32 0, i32 %arg4) #0
+  %p1.i179 = call float @llvm.amdgcn.interp.p1(float %i.f.i177, i32 2, i32 0, i32 %arg4) nounwind
+  %p2.i180 = call float @llvm.amdgcn.interp.p2(float %p1.i179, float %j.f.i178, i32 2, i32 0, i32 %arg4) nounwind
   %i.i169 = extractelement <2 x i32> %arg6, i32 0
   %j.i170 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i171 = bitcast i32 %i.i169 to float
   %j.f.i172 = bitcast i32 %j.i170 to float
-  %p1.i173 = call float @llvm.amdgcn.interp.p1(float %i.f.i171, i32 3, i32 0, i32 %arg4) #0
-  %p2.i174 = call float @llvm.amdgcn.interp.p2(float %p1.i173, float %j.f.i172, i32 3, i32 0, i32 %arg4) #0
+  %p1.i173 = call float @llvm.amdgcn.interp.p1(float %i.f.i171, i32 3, i32 0, i32 %arg4) nounwind
+  %p2.i174 = call float @llvm.amdgcn.interp.p2(float %p1.i173, float %j.f.i172, i32 3, i32 0, i32 %arg4) nounwind
   %i.i163 = extractelement <2 x i32> %arg6, i32 0
   %j.i164 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i165 = bitcast i32 %i.i163 to float
   %j.f.i166 = bitcast i32 %j.i164 to float
-  %p1.i167 = call float @llvm.amdgcn.interp.p1(float %i.f.i165, i32 0, i32 1, i32 %arg4) #0
-  %p2.i168 = call float @llvm.amdgcn.interp.p2(float %p1.i167, float %j.f.i166, i32 0, i32 1, i32 %arg4) #0
+  %p1.i167 = call float @llvm.amdgcn.interp.p1(float %i.f.i165, i32 0, i32 1, i32 %arg4) nounwind
+  %p2.i168 = call float @llvm.amdgcn.interp.p2(float %p1.i167, float %j.f.i166, i32 0, i32 1, i32 %arg4) nounwind
   %i.i157 = extractelement <2 x i32> %arg6, i32 0
   %j.i158 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i159 = bitcast i32 %i.i157 to float
   %j.f.i160 = bitcast i32 %j.i158 to float
-  %p1.i161 = call float @llvm.amdgcn.interp.p1(float %i.f.i159, i32 1, i32 1, i32 %arg4) #0
-  %p2.i162 = call float @llvm.amdgcn.interp.p2(float %p1.i161, float %j.f.i160, i32 1, i32 1, i32 %arg4) #0
+  %p1.i161 = call float @llvm.amdgcn.interp.p1(float %i.f.i159, i32 1, i32 1, i32 %arg4) nounwind
+  %p2.i162 = call float @llvm.amdgcn.interp.p2(float %p1.i161, float %j.f.i160, i32 1, i32 1, i32 %arg4) nounwind
   %i.i151 = extractelement <2 x i32> %arg6, i32 0
   %j.i152 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i153 = bitcast i32 %i.i151 to float
   %j.f.i154 = bitcast i32 %j.i152 to float
-  %p1.i155 = call float @llvm.amdgcn.interp.p1(float %i.f.i153, i32 2, i32 1, i32 %arg4) #0
-  %p2.i156 = call float @llvm.amdgcn.interp.p2(float %p1.i155, float %j.f.i154, i32 2, i32 1, i32 %arg4) #0
+  %p1.i155 = call float @llvm.amdgcn.interp.p1(float %i.f.i153, i32 2, i32 1, i32 %arg4) nounwind
+  %p2.i156 = call float @llvm.amdgcn.interp.p2(float %p1.i155, float %j.f.i154, i32 2, i32 1, i32 %arg4) nounwind
   %i.i145 = extractelement <2 x i32> %arg6, i32 0
   %j.i146 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i147 = bitcast i32 %i.i145 to float
   %j.f.i148 = bitcast i32 %j.i146 to float
-  %p1.i149 = call float @llvm.amdgcn.interp.p1(float %i.f.i147, i32 3, i32 1, i32 %arg4) #0
-  %p2.i150 = call float @llvm.amdgcn.interp.p2(float %p1.i149, float %j.f.i148, i32 3, i32 1, i32 %arg4) #0
+  %p1.i149 = call float @llvm.amdgcn.interp.p1(float %i.f.i147, i32 3, i32 1, i32 %arg4) nounwind
+  %p2.i150 = call float @llvm.amdgcn.interp.p2(float %p1.i149, float %j.f.i148, i32 3, i32 1, i32 %arg4) nounwind
   %i.i139 = extractelement <2 x i32> %arg6, i32 0
   %j.i140 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i141 = bitcast i32 %i.i139 to float
   %j.f.i142 = bitcast i32 %j.i140 to float
-  %p1.i143 = call float @llvm.amdgcn.interp.p1(float %i.f.i141, i32 0, i32 2, i32 %arg4) #0
-  %p2.i144 = call float @llvm.amdgcn.interp.p2(float %p1.i143, float %j.f.i142, i32 0, i32 2, i32 %arg4) #0
+  %p1.i143 = call float @llvm.amdgcn.interp.p1(float %i.f.i141, i32 0, i32 2, i32 %arg4) nounwind
+  %p2.i144 = call float @llvm.amdgcn.interp.p2(float %p1.i143, float %j.f.i142, i32 0, i32 2, i32 %arg4) nounwind
   %i.i133 = extractelement <2 x i32> %arg6, i32 0
   %j.i134 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i135 = bitcast i32 %i.i133 to float
   %j.f.i136 = bitcast i32 %j.i134 to float
-  %p1.i137 = call float @llvm.amdgcn.interp.p1(float %i.f.i135, i32 1, i32 2, i32 %arg4) #0
-  %p2.i138 = call float @llvm.amdgcn.interp.p2(float %p1.i137, float %j.f.i136, i32 1, i32 2, i32 %arg4) #0
+  %p1.i137 = call float @llvm.amdgcn.interp.p1(float %i.f.i135, i32 1, i32 2, i32 %arg4) nounwind
+  %p2.i138 = call float @llvm.amdgcn.interp.p2(float %p1.i137, float %j.f.i136, i32 1, i32 2, i32 %arg4) nounwind
   %i.i127 = extractelement <2 x i32> %arg6, i32 0
   %j.i128 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i129 = bitcast i32 %i.i127 to float
   %j.f.i130 = bitcast i32 %j.i128 to float
-  %p1.i131 = call float @llvm.amdgcn.interp.p1(float %i.f.i129, i32 2, i32 2, i32 %arg4) #0
-  %p2.i132 = call float @llvm.amdgcn.interp.p2(float %p1.i131, float %j.f.i130, i32 2, i32 2, i32 %arg4) #0
+  %p1.i131 = call float @llvm.amdgcn.interp.p1(float %i.f.i129, i32 2, i32 2, i32 %arg4) nounwind
+  %p2.i132 = call float @llvm.amdgcn.interp.p2(float %p1.i131, float %j.f.i130, i32 2, i32 2, i32 %arg4) nounwind
   %i.i121 = extractelement <2 x i32> %arg6, i32 0
   %j.i122 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i123 = bitcast i32 %i.i121 to float
   %j.f.i124 = bitcast i32 %j.i122 to float
-  %p1.i125 = call float @llvm.amdgcn.interp.p1(float %i.f.i123, i32 3, i32 2, i32 %arg4) #0
-  %p2.i126 = call float @llvm.amdgcn.interp.p2(float %p1.i125, float %j.f.i124, i32 3, i32 2, i32 %arg4) #0
+  %p1.i125 = call float @llvm.amdgcn.interp.p1(float %i.f.i123, i32 3, i32 2, i32 %arg4) nounwind
+  %p2.i126 = call float @llvm.amdgcn.interp.p2(float %p1.i125, float %j.f.i124, i32 3, i32 2, i32 %arg4) nounwind
   %i.i115 = extractelement <2 x i32> %arg6, i32 0
   %j.i116 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i117 = bitcast i32 %i.i115 to float
   %j.f.i118 = bitcast i32 %j.i116 to float
-  %p1.i119 = call float @llvm.amdgcn.interp.p1(float %i.f.i117, i32 0, i32 3, i32 %arg4) #0
-  %p2.i120 = call float @llvm.amdgcn.interp.p2(float %p1.i119, float %j.f.i118, i32 0, i32 3, i32 %arg4) #0
+  %p1.i119 = call float @llvm.amdgcn.interp.p1(float %i.f.i117, i32 0, i32 3, i32 %arg4) nounwind
+  %p2.i120 = call float @llvm.amdgcn.interp.p2(float %p1.i119, float %j.f.i118, i32 0, i32 3, i32 %arg4) nounwind
   %i.i109 = extractelement <2 x i32> %arg6, i32 0
   %j.i110 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i111 = bitcast i32 %i.i109 to float
   %j.f.i112 = bitcast i32 %j.i110 to float
-  %p1.i113 = call float @llvm.amdgcn.interp.p1(float %i.f.i111, i32 1, i32 3, i32 %arg4) #0
-  %p2.i114 = call float @llvm.amdgcn.interp.p2(float %p1.i113, float %j.f.i112, i32 1, i32 3, i32 %arg4) #0
+  %p1.i113 = call float @llvm.amdgcn.interp.p1(float %i.f.i111, i32 1, i32 3, i32 %arg4) nounwind
+  %p2.i114 = call float @llvm.amdgcn.interp.p2(float %p1.i113, float %j.f.i112, i32 1, i32 3, i32 %arg4) nounwind
   %i.i103 = extractelement <2 x i32> %arg6, i32 0
   %j.i104 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i105 = bitcast i32 %i.i103 to float
   %j.f.i106 = bitcast i32 %j.i104 to float
-  %p1.i107 = call float @llvm.amdgcn.interp.p1(float %i.f.i105, i32 2, i32 3, i32 %arg4) #0
-  %p2.i108 = call float @llvm.amdgcn.interp.p2(float %p1.i107, float %j.f.i106, i32 2, i32 3, i32 %arg4) #0
+  %p1.i107 = call float @llvm.amdgcn.interp.p1(float %i.f.i105, i32 2, i32 3, i32 %arg4) nounwind
+  %p2.i108 = call float @llvm.amdgcn.interp.p2(float %p1.i107, float %j.f.i106, i32 2, i32 3, i32 %arg4) nounwind
   %i.i97 = extractelement <2 x i32> %arg6, i32 0
   %j.i98 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i99 = bitcast i32 %i.i97 to float
   %j.f.i100 = bitcast i32 %j.i98 to float
-  %p1.i101 = call float @llvm.amdgcn.interp.p1(float %i.f.i99, i32 3, i32 3, i32 %arg4) #0
-  %p2.i102 = call float @llvm.amdgcn.interp.p2(float %p1.i101, float %j.f.i100, i32 3, i32 3, i32 %arg4) #0
+  %p1.i101 = call float @llvm.amdgcn.interp.p1(float %i.f.i99, i32 3, i32 3, i32 %arg4) nounwind
+  %p2.i102 = call float @llvm.amdgcn.interp.p2(float %p1.i101, float %j.f.i100, i32 3, i32 3, i32 %arg4) nounwind
   %i.i91 = extractelement <2 x i32> %arg6, i32 0
   %j.i92 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i93 = bitcast i32 %i.i91 to float
   %j.f.i94 = bitcast i32 %j.i92 to float
-  %p1.i95 = call float @llvm.amdgcn.interp.p1(float %i.f.i93, i32 0, i32 4, i32 %arg4) #0
-  %p2.i96 = call float @llvm.amdgcn.interp.p2(float %p1.i95, float %j.f.i94, i32 0, i32 4, i32 %arg4) #0
+  %p1.i95 = call float @llvm.amdgcn.interp.p1(float %i.f.i93, i32 0, i32 4, i32 %arg4) nounwind
+  %p2.i96 = call float @llvm.amdgcn.interp.p2(float %p1.i95, float %j.f.i94, i32 0, i32 4, i32 %arg4) nounwind
   %i.i85 = extractelement <2 x i32> %arg6, i32 0
   %j.i86 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i87 = bitcast i32 %i.i85 to float
   %j.f.i88 = bitcast i32 %j.i86 to float
-  %p1.i89 = call float @llvm.amdgcn.interp.p1(float %i.f.i87, i32 1, i32 4, i32 %arg4) #0
-  %p2.i90 = call float @llvm.amdgcn.interp.p2(float %p1.i89, float %j.f.i88, i32 1, i32 4, i32 %arg4) #0
+  %p1.i89 = call float @llvm.amdgcn.interp.p1(float %i.f.i87, i32 1, i32 4, i32 %arg4) nounwind
+  %p2.i90 = call float @llvm.amdgcn.interp.p2(float %p1.i89, float %j.f.i88, i32 1, i32 4, i32 %arg4) nounwind
   %i.i79 = extractelement <2 x i32> %arg6, i32 0
   %j.i80 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i81 = bitcast i32 %i.i79 to float
   %j.f.i82 = bitcast i32 %j.i80 to float
-  %p1.i83 = call float @llvm.amdgcn.interp.p1(float %i.f.i81, i32 2, i32 4, i32 %arg4) #0
-  %p2.i84 = call float @llvm.amdgcn.interp.p2(float %p1.i83, float %j.f.i82, i32 2, i32 4, i32 %arg4) #0
+  %p1.i83 = call float @llvm.amdgcn.interp.p1(float %i.f.i81, i32 2, i32 4, i32 %arg4) nounwind
+  %p2.i84 = call float @llvm.amdgcn.interp.p2(float %p1.i83, float %j.f.i82, i32 2, i32 4, i32 %arg4) nounwind
   %i.i73 = extractelement <2 x i32> %arg6, i32 0
   %j.i74 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i75 = bitcast i32 %i.i73 to float
   %j.f.i76 = bitcast i32 %j.i74 to float
-  %p1.i77 = call float @llvm.amdgcn.interp.p1(float %i.f.i75, i32 3, i32 4, i32 %arg4) #0
-  %p2.i78 = call float @llvm.amdgcn.interp.p2(float %p1.i77, float %j.f.i76, i32 3, i32 4, i32 %arg4) #0
+  %p1.i77 = call float @llvm.amdgcn.interp.p1(float %i.f.i75, i32 3, i32 4, i32 %arg4) nounwind
+  %p2.i78 = call float @llvm.amdgcn.interp.p2(float %p1.i77, float %j.f.i76, i32 3, i32 4, i32 %arg4) nounwind
   %i.i67 = extractelement <2 x i32> %arg6, i32 0
   %j.i68 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i69 = bitcast i32 %i.i67 to float
   %j.f.i70 = bitcast i32 %j.i68 to float
-  %p1.i71 = call float @llvm.amdgcn.interp.p1(float %i.f.i69, i32 0, i32 5, i32 %arg4) #0
-  %p2.i72 = call float @llvm.amdgcn.interp.p2(float %p1.i71, float %j.f.i70, i32 0, i32 5, i32 %arg4) #0
+  %p1.i71 = call float @llvm.amdgcn.interp.p1(float %i.f.i69, i32 0, i32 5, i32 %arg4) nounwind
+  %p2.i72 = call float @llvm.amdgcn.interp.p2(float %p1.i71, float %j.f.i70, i32 0, i32 5, i32 %arg4) nounwind
   %i.i61 = extractelement <2 x i32> %arg6, i32 0
   %j.i62 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i63 = bitcast i32 %i.i61 to float
   %j.f.i64 = bitcast i32 %j.i62 to float
-  %p1.i65 = call float @llvm.amdgcn.interp.p1(float %i.f.i63, i32 1, i32 5, i32 %arg4) #0
-  %p2.i66 = call float @llvm.amdgcn.interp.p2(float %p1.i65, float %j.f.i64, i32 1, i32 5, i32 %arg4) #0
+  %p1.i65 = call float @llvm.amdgcn.interp.p1(float %i.f.i63, i32 1, i32 5, i32 %arg4) nounwind
+  %p2.i66 = call float @llvm.amdgcn.interp.p2(float %p1.i65, float %j.f.i64, i32 1, i32 5, i32 %arg4) nounwind
   %i.i55 = extractelement <2 x i32> %arg6, i32 0
   %j.i56 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i57 = bitcast i32 %i.i55 to float
   %j.f.i58 = bitcast i32 %j.i56 to float
-  %p1.i59 = call float @llvm.amdgcn.interp.p1(float %i.f.i57, i32 2, i32 5, i32 %arg4) #0
-  %p2.i60 = call float @llvm.amdgcn.interp.p2(float %p1.i59, float %j.f.i58, i32 2, i32 5, i32 %arg4) #0
+  %p1.i59 = call float @llvm.amdgcn.interp.p1(float %i.f.i57, i32 2, i32 5, i32 %arg4) nounwind
+  %p2.i60 = call float @llvm.amdgcn.interp.p2(float %p1.i59, float %j.f.i58, i32 2, i32 5, i32 %arg4) nounwind
   %i.i49 = extractelement <2 x i32> %arg6, i32 0
   %j.i50 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i51 = bitcast i32 %i.i49 to float
   %j.f.i52 = bitcast i32 %j.i50 to float
-  %p1.i53 = call float @llvm.amdgcn.interp.p1(float %i.f.i51, i32 3, i32 5, i32 %arg4) #0
-  %p2.i54 = call float @llvm.amdgcn.interp.p2(float %p1.i53, float %j.f.i52, i32 3, i32 5, i32 %arg4) #0
+  %p1.i53 = call float @llvm.amdgcn.interp.p1(float %i.f.i51, i32 3, i32 5, i32 %arg4) nounwind
+  %p2.i54 = call float @llvm.amdgcn.interp.p2(float %p1.i53, float %j.f.i52, i32 3, i32 5, i32 %arg4) nounwind
   %i.i43 = extractelement <2 x i32> %arg6, i32 0
   %j.i44 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i45 = bitcast i32 %i.i43 to float
   %j.f.i46 = bitcast i32 %j.i44 to float
-  %p1.i47 = call float @llvm.amdgcn.interp.p1(float %i.f.i45, i32 0, i32 6, i32 %arg4) #0
-  %p2.i48 = call float @llvm.amdgcn.interp.p2(float %p1.i47, float %j.f.i46, i32 0, i32 6, i32 %arg4) #0
+  %p1.i47 = call float @llvm.amdgcn.interp.p1(float %i.f.i45, i32 0, i32 6, i32 %arg4) nounwind
+  %p2.i48 = call float @llvm.amdgcn.interp.p2(float %p1.i47, float %j.f.i46, i32 0, i32 6, i32 %arg4) nounwind
   %i.i37 = extractelement <2 x i32> %arg6, i32 0
   %j.i38 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i39 = bitcast i32 %i.i37 to float
   %j.f.i40 = bitcast i32 %j.i38 to float
-  %p1.i41 = call float @llvm.amdgcn.interp.p1(float %i.f.i39, i32 1, i32 6, i32 %arg4) #0
-  %p2.i42 = call float @llvm.amdgcn.interp.p2(float %p1.i41, float %j.f.i40, i32 1, i32 6, i32 %arg4) #0
+  %p1.i41 = call float @llvm.amdgcn.interp.p1(float %i.f.i39, i32 1, i32 6, i32 %arg4) nounwind
+  %p2.i42 = call float @llvm.amdgcn.interp.p2(float %p1.i41, float %j.f.i40, i32 1, i32 6, i32 %arg4) nounwind
   %i.i31 = extractelement <2 x i32> %arg6, i32 0
   %j.i32 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i33 = bitcast i32 %i.i31 to float
   %j.f.i34 = bitcast i32 %j.i32 to float
-  %p1.i35 = call float @llvm.amdgcn.interp.p1(float %i.f.i33, i32 2, i32 6, i32 %arg4) #0
-  %p2.i36 = call float @llvm.amdgcn.interp.p2(float %p1.i35, float %j.f.i34, i32 2, i32 6, i32 %arg4) #0
+  %p1.i35 = call float @llvm.amdgcn.interp.p1(float %i.f.i33, i32 2, i32 6, i32 %arg4) nounwind
+  %p2.i36 = call float @llvm.amdgcn.interp.p2(float %p1.i35, float %j.f.i34, i32 2, i32 6, i32 %arg4) nounwind
   %i.i25 = extractelement <2 x i32> %arg6, i32 0
   %j.i26 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i27 = bitcast i32 %i.i25 to float
   %j.f.i28 = bitcast i32 %j.i26 to float
-  %p1.i29 = call float @llvm.amdgcn.interp.p1(float %i.f.i27, i32 3, i32 6, i32 %arg4) #0
-  %p2.i30 = call float @llvm.amdgcn.interp.p2(float %p1.i29, float %j.f.i28, i32 3, i32 6, i32 %arg4) #0
+  %p1.i29 = call float @llvm.amdgcn.interp.p1(float %i.f.i27, i32 3, i32 6, i32 %arg4) nounwind
+  %p2.i30 = call float @llvm.amdgcn.interp.p2(float %p1.i29, float %j.f.i28, i32 3, i32 6, i32 %arg4) nounwind
   %i.i19 = extractelement <2 x i32> %arg6, i32 0
   %j.i20 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i21 = bitcast i32 %i.i19 to float
   %j.f.i22 = bitcast i32 %j.i20 to float
-  %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 0, i32 7, i32 %arg4) #0
-  %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 0, i32 7, i32 %arg4) #0
+  %p1.i23 = call float @llvm.amdgcn.interp.p1(float %i.f.i21, i32 0, i32 7, i32 %arg4) nounwind
+  %p2.i24 = call float @llvm.amdgcn.interp.p2(float %p1.i23, float %j.f.i22, i32 0, i32 7, i32 %arg4) nounwind
   %i.i13 = extractelement <2 x i32> %arg6, i32 0
   %j.i14 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i15 = bitcast i32 %i.i13 to float
   %j.f.i16 = bitcast i32 %j.i14 to float
-  %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 1, i32 7, i32 %arg4) #0
-  %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 1, i32 7, i32 %arg4) #0
+  %p1.i17 = call float @llvm.amdgcn.interp.p1(float %i.f.i15, i32 1, i32 7, i32 %arg4) nounwind
+  %p2.i18 = call float @llvm.amdgcn.interp.p2(float %p1.i17, float %j.f.i16, i32 1, i32 7, i32 %arg4) nounwind
   %i.i7 = extractelement <2 x i32> %arg6, i32 0
   %j.i8 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i9 = bitcast i32 %i.i7 to float
   %j.f.i10 = bitcast i32 %j.i8 to float
-  %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 2, i32 7, i32 %arg4) #0
-  %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 2, i32 7, i32 %arg4) #0
+  %p1.i11 = call float @llvm.amdgcn.interp.p1(float %i.f.i9, i32 2, i32 7, i32 %arg4) nounwind
+  %p2.i12 = call float @llvm.amdgcn.interp.p2(float %p1.i11, float %j.f.i10, i32 2, i32 7, i32 %arg4) nounwind
   %i.i1 = extractelement <2 x i32> %arg6, i32 0
   %j.i2 = extractelement <2 x i32> %arg6, i32 1
   %i.f.i3 = bitcast i32 %i.i1 to float
   %j.f.i4 = bitcast i32 %j.i2 to float
-  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 3, i32 7, i32 %arg4) #0
-  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 3, i32 7, i32 %arg4) #0
+  %p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 3, i32 7, i32 %arg4) nounwind
+  %p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 3, i32 7, i32 %arg4) nounwind
   %tmp195 = fmul float %arg14, %tmp123
   %tmp196 = fadd float %tmp195, %tmp124
   %max.0.i = call float @llvm.maxnum.f32(float %tmp162, float 0.000000e+00)
@@ -1648,7 +1648,7 @@ ENDIF209:                                         ; preds = %ELSE214, %ELSE211,
   %clamp.i2 = call float @llvm.minnum.f32(float %max.0.i1, float 1.000000e+00)
   %tmp776 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp768, float %tmp770)
   %tmp778 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp772, float %clamp.i2)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp776, <2 x half> %tmp778, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp776, <2 x half> %tmp778, i1 true, i1 true) nounwind
   ret void
 
 ELSE214:                                          ; preds = %ELSE211
@@ -1664,32 +1664,28 @@ ELSE214:                                          ; preds = %ELSE211
   br label %ENDIF209
 }
 
-declare float @llvm.exp2.f32(float) #1
-declare float @llvm.ceil.f32(float) #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.pow.f32(float, float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.rsq.f32(float) #1
-declare float @llvm.amdgcn.cubeid(float, float, float) #1
-declare float @llvm.amdgcn.cubesc(float, float, float) #1
-declare float @llvm.amdgcn.cubetc(float, float, float) #1
-declare float @llvm.amdgcn.cubema(float, float, float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
+declare float @llvm.exp2.f32(float) nounwind readnone
+declare float @llvm.ceil.f32(float) nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.pow.f32(float, float) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
+declare float @llvm.amdgcn.cubeid(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.cubesc(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.cubetc(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.cubema(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) nounwind readnone
 
 !0 = !{!1, !1, i64 0, i32 1}
 !1 = !{!"const", !2}
diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll b/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
index 7290b47658b3d5..ef6198f5c98592 100644
--- a/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
@@ -7,7 +7,7 @@
 ; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
 ; SI-NOT: v_readlane_b32 [[SAVED]]
 
-define amdgpu_ps void @main() #0 {
+define amdgpu_ps void @main() nounwind {
 main_body:
   %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 16, i32 0)
   %tmp1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 32, i32 0)
@@ -90,7 +90,7 @@ ENDLOOP:                                          ; preds = %ELSE2566, %LOOP
   %one.sub.ac.i = fmul float %one.sub.a.i, undef
   %fmul = fmul float undef, undef
   %result.i = fadd float %fmul, %one.sub.ac.i
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float undef, float %result.i, float undef, float 1.000000e+00, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float undef, float %result.i, float undef, float 1.000000e+00, i1 true, i1 true) nounwind
   ret void
 
 ENDIF:                                            ; preds = %LOOP
@@ -492,13 +492,10 @@ ELSE2824:                                         ; preds = %ELSE2821
   br label %ENDIF2795
 }
 
-declare float @llvm.floor.f32(float) #1
-declare float @llvm.sqrt.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.floor.f32(float) nounwind readnone
+declare float @llvm.sqrt.f32(float) nounwind readnone
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
index 8fec79a7efd9b9..6f9b9210ff4651 100644
--- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -11,7 +11,7 @@
 ; GCN: flat_load_dwordx4
 ; GCN: ds_write_b128 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:512
 ; GCN: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:129 offset1:130
-define amdgpu_kernel void @no_reorder_flat_load_local_store_local_load(ptr addrspace(3) %out, ptr %fptr) #0 {
+define amdgpu_kernel void @no_reorder_flat_load_local_store_local_load(ptr addrspace(3) %out, ptr %fptr) nounwind {
   %ptr1 = getelementptr %struct.lds, ptr addrspace(3) @stored_lds_struct, i32 0, i32 1
   %ptr2 = getelementptr %struct.lds, ptr addrspace(3) @stored_lds_struct, i32 0, i32 1, i32 4
   call void @llvm.memcpy.p3.p0(ptr addrspace(3) align 16 %ptr1, ptr align 8 %fptr, i64 16, i1 false)
@@ -27,7 +27,7 @@ define amdgpu_kernel void @no_reorder_flat_load_local_store_local_load(ptr addrs
 ; GFX9: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:1 offset1:3
 ; GFX9: global_store_dword
 ; GFX9: global_store_dword
-define amdgpu_kernel void @reorder_local_load_global_store_local_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 {
+define amdgpu_kernel void @reorder_local_load_global_store_local_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) nounwind {
   %ptr0 = load ptr addrspace(3), ptr addrspace(3) @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 1
@@ -51,7 +51,7 @@ define amdgpu_kernel void @reorder_local_load_global_store_local_load(ptr addrsp
 ; GFX9: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
 ; GFX9: global_store_dword
 ; GFX9: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
-define amdgpu_kernel void @no_reorder_local_load_volatile_global_store_local_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 {
+define amdgpu_kernel void @no_reorder_local_load_volatile_global_store_local_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) nounwind {
   %ptr0 = load ptr addrspace(3), ptr addrspace(3) @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 1
@@ -77,7 +77,7 @@ define amdgpu_kernel void @no_reorder_local_load_volatile_global_store_local_loa
 ; GFX9: s_barrier
 ; GFX9-DAG: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
 ; GFX9-DAG: global_store_dword
-define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 {
+define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) nounwind {
   %ptr0 = load ptr addrspace(3), ptr addrspace(3) @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 1
@@ -85,7 +85,7 @@ define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load
 
   %tmp1 = load i32, ptr addrspace(3) %ptr1, align 4
   store i32 99, ptr addrspace(1) %gptr, align 4
-  call void @llvm.amdgcn.s.barrier() #1
+  call void @llvm.amdgcn.s.barrier() convergent nounwind willreturn
   %tmp2 = load i32, ptr addrspace(3) %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
@@ -108,7 +108,7 @@ define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load
 
 ; CI: buffer_store_dword
 ; GFX9: global_store_dword
-define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) #0 {
+define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(ptr addrspace(1) %out, ptr addrspace(1) %gptr) nounwind {
   %ptr0 = load ptr addrspace(4), ptr addrspace(3) @stored_constant_ptr, align 8
 
   %ptr1 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 1
@@ -137,7 +137,7 @@ define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(ptr
 ; GCN-DAG: ds_write_b32
 ; CI: buffer_store_dword
 ; GFX9: global_store_dword
-define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(ptr addrspace(1) %out, ptr addrspace(3) %lptr) #0 {
+define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(ptr addrspace(1) %out, ptr addrspace(3) %lptr) nounwind {
   %ptr0 = load ptr addrspace(4), ptr addrspace(3) @stored_constant_ptr, align 8
 
   %ptr1 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 1
@@ -160,7 +160,7 @@ define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(ptr a
 ; GCN: ds_write_b32
 ; CI: buffer_store_dword
 ; GFX9: global_store_dword
-define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(ptr addrspace(1) %out, ptr addrspace(3) noalias %lptr, ptr addrspace(4) %ptr0) #0 {
+define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(ptr addrspace(1) %out, ptr addrspace(3) noalias %lptr, ptr addrspace(4) %ptr0) nounwind {
   %ptr1 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, ptr addrspace(4) %ptr0, i64 2
 
@@ -183,7 +183,7 @@ define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(ptr addrspace
 ; GFX9: global_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
 ; GFX9: global_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:12
 ; GFX9: ds_write_b32
-define amdgpu_kernel void @reorder_global_load_local_store_global_load(ptr addrspace(1) %out, ptr addrspace(3) %lptr, ptr addrspace(1) %ptr0) #0 {
+define amdgpu_kernel void @reorder_global_load_local_store_global_load(ptr addrspace(1) %out, ptr addrspace(3) %lptr, ptr addrspace(1) %ptr0) nounwind {
   %ptr1 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i64 3
 
@@ -204,7 +204,7 @@ define amdgpu_kernel void @reorder_global_load_local_store_global_load(ptr addrs
 ; CI: buffer_store_dword
 ; GFX9: global_store_dword
 ; GCN: s_endpgm
-define amdgpu_kernel void @reorder_local_offsets(ptr addrspace(1) nocapture %out, ptr addrspace(1) noalias nocapture readnone %gptr, ptr addrspace(3) noalias nocapture %ptr0) #0 {
+define amdgpu_kernel void @reorder_local_offsets(ptr addrspace(1) nocapture %out, ptr addrspace(1) noalias nocapture readnone %gptr, ptr addrspace(3) noalias nocapture %ptr0) nounwind {
   %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 3
   %ptr2 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 100
   %ptr3 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 102
@@ -238,7 +238,7 @@ define amdgpu_kernel void @reorder_local_offsets(ptr addrspace(1) nocapture %out
 ; GFX9-DAG: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:408
 ; GFX9: global_store_dword
 ; GFX9: s_endpgm
-define amdgpu_kernel void @reorder_global_offsets(ptr addrspace(1) nocapture %out, ptr addrspace(1) noalias nocapture readnone %gptr, ptr addrspace(1) noalias nocapture %ptr0) #0 {
+define amdgpu_kernel void @reorder_global_offsets(ptr addrspace(1) nocapture %out, ptr addrspace(1) noalias nocapture readnone %gptr, ptr addrspace(1) noalias nocapture %ptr0) nounwind {
   %ptr1 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 3
   %ptr2 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 100
   %ptr3 = getelementptr inbounds i32, ptr addrspace(1) %ptr0, i32 102
@@ -281,7 +281,7 @@ define amdgpu_kernel void @reorder_global_offsets(ptr addrspace(1) nocapture %ou
 ; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:36
 ; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:52
 
-define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0(ptr addrspace(1) noalias nocapture %ptr.base) #0 {
+define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0(ptr addrspace(1) noalias nocapture %ptr.base) nounwind {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %id.ext = sext i32 %id to i64
 
@@ -308,7 +308,7 @@ define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0(ptr addrspace(
 ; GCN-LABEL: {{^}}reorder_local_load_tbuffer_store_local_load:
 ; GCN: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:1 offset1:2
 ; GCN: tbuffer_store_format
-define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(ptr addrspace(1) %out, i32 %a1, i32 %vaddr) #0 {
+define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(ptr addrspace(1) %out, i32 %a1, i32 %vaddr) nounwind {
   %ptr0 = load ptr addrspace(3), ptr addrspace(3) @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, ptr addrspace(3) %ptr0, i32 1
@@ -328,11 +328,6 @@ define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(ptr addrspace
 }
 
 declare void @llvm.memcpy.p3.p0(ptr addrspace(3), ptr, i64, i1)
-declare void @llvm.amdgcn.s.barrier() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #3
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind willreturn }
-attributes #2 = { nounwind readnone speculatable willreturn }
-attributes #3 = { nounwind willreturn writeonly }
+declare void @llvm.amdgcn.s.barrier() convergent nounwind willreturn
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
+declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) nounwind willreturn writeonly
diff --git a/llvm/test/CodeGen/AMDGPU/si-vector-hang.ll b/llvm/test/CodeGen/AMDGPU/si-vector-hang.ll
index 99efc0eb60d171..20ef2f9fb49637 100644
--- a/llvm/test/CodeGen/AMDGPU/si-vector-hang.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-vector-hang.ll
@@ -12,7 +12,7 @@
 ; CHECK: buffer_store_byte
 ; ModuleID = 'radeon'
 
-define amdgpu_kernel void @test_8_min_char(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture readonly %in0, ptr addrspace(1) nocapture readonly %in1) #0 {
+define amdgpu_kernel void @test_8_min_char(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture readonly %in0, ptr addrspace(1) nocapture readonly %in1) nounwind {
 entry:
   %0 = load i8, ptr addrspace(1) %in0, align 1
   %1 = insertelement <8 x i8> undef, i8 %0, i32 0
@@ -90,8 +90,6 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind }
-
 !opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
 
 !0 = !{null}
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 2c0f64f85d823a..c65a68ebb0b3e6 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -9,7 +9,7 @@ target datalayout = "A5"
 ; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v0, v1
 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
 ; GCN-NEXT: s_setpc_b64
-define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
+define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) nounwind noinline {
   %add0 = add i32 %arg0, %arg1
   ret i32 %add0
 }
@@ -23,7 +23,7 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
 ; GCN: s_waitcnt vmcnt(0)
 ; GCN: s_setpc_b64
 ; GCN: ; ScratchSize: 68
-define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
+define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) nounwind noinline {
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
   store volatile i32 9, ptr addrspace(5) %gep
@@ -32,7 +32,7 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
 }
 
 ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32:
-define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 {
+define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) nounwind noinline {
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   ret i32 %ret
@@ -43,7 +43,7 @@ entry:
 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s32 offset:20
 ; GCN: s_setpc_b64
 ; GCN: ; ScratchSize: 68
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) nounwind noinline {
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
@@ -57,7 +57,7 @@ entry:
 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s32 offset:20
 ; GCN: s_setpc_b64
 ; GCN: ; ScratchSize: 136
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) nounwind noinline {
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
@@ -67,7 +67,7 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_unused_result:
-define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
+define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) nounwind noinline {
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   ret void
@@ -75,8 +75,8 @@ entry:
 
 ; It doesn't make sense to do a tail from a kernel
 ; GCN-LABEL: {{^}}kernel_call_i32_fastcc_i32_i32_unused_result:
-;define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
-define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
+;define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) nounwind noinline {
+define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) nounwind noinline {
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   ret void
@@ -91,7 +91,7 @@ entry:
 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
 
 ; GCN-NEXT: s_setpc_b64 s[30:31]
-define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, ptr addrspace(5) byval(i32) align 4 %arg1) #1 {
+define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, ptr addrspace(5) byval(i32) align 4 %arg1) nounwind noinline {
   %arg1.load = load i32, ptr addrspace(5) %arg1, align 4
   %add0 = add i32 %arg0, %arg1.load
   ret i32 %add0
@@ -104,7 +104,7 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, ptr addrspace(5) b
 ; GCN: s_swappc_b64
 ; GCN-NOT: v_readlane_b32 s32
 ; GCN: s_setpc_b64
-define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, ptr addrspace(5) byval(i32) %b.byval, i32 %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, ptr addrspace(5) byval(i32) %b.byval, i32 %c) nounwind noinline {
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) %b.byval)
   ret i32 %ret
@@ -120,7 +120,7 @@ entry:
 ; GCN: buffer_load_dword v1, off, s[0:3], 0 offset:16
 ; GCN: buffer_store_dword v1, off, s[0:3], s32{{$}}
 ; GCN-NEXT: s_setpc_b64
-define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) nounwind noinline {
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) inttoptr (i32 16 to ptr addrspace(5)))
   ret i32 %ret
@@ -140,7 +140,7 @@ entry:
 ; GFX9: v_add3_u32 v0, v0, v3, v2
 
 ; GCN-NEXT: s_setpc_b64
-define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 {
+define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) nounwind noinline {
   %val_firststack = extractvalue [32 x i32] %large, 30
   %val_laststack = extractvalue [32 x i32] %large, 31
   %add0 = add i32 %arg0, %arg1
@@ -164,7 +164,7 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l
 
 ; GCN-NOT: s32
 ; GCN: s_setpc_b64
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) nounwind noinline {
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c)
   ret i32 %ret
@@ -174,7 +174,7 @@ entry:
 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
 ; GCN: buffer_store_dword [[NINE]], off, s[0:3], s32 offset:32
 ; GCN: s_setpc_b64
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) nounwind noinline {
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
@@ -190,7 +190,7 @@ entry:
 ; GCN-LABEL: {{^}}no_sibling_call_callee_more_stack_space:
 ; GCN: s_swappc_b64
 ; GCN: s_setpc_b64
-define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
+define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) nounwind noinline {
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer)
   ret i32 %ret
@@ -233,7 +233,7 @@ entry:
 ; GCN-NEXT: s_addk_i32 s32, 0xfc00
 ; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 ; GCN-NEXT: s_setpc_b64 s[4:5]
-define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) nounwind noinline {
 entry:
   %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   %ret = tail call fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %other.call)
@@ -251,7 +251,7 @@ entry:
 
 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 offset:
 ; GCN: s_setpc_b64 s[4:5]
-define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
+define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) nounwind noinline {
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
@@ -266,7 +266,7 @@ entry:
 
 ; GCN-NOT: s33
 ; GCN: s_setpc_b64 s[4:5]
-define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 {
+define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) nounwind noinline {
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
@@ -282,7 +282,7 @@ entry:
 ; GCN: s_load_dwordx2 [[GV_ADDR:s\[[0-9]+:[0-9]+\]]]
 ; GCN: s_load_dwordx2 [[FUNC_PTR:s\[[0-9]+:[0-9]+\]]], [[GV_ADDR]]
 ; GCN: s_setpc_b64 [[FUNC_PTR]]
-define hidden fastcc i32 @indirect_uniform_sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 {
+define hidden fastcc i32 @indirect_uniform_sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) nounwind noinline {
 entry:
   %func.ptr.load = load ptr, ptr addrspace(4) @func_ptr_gv
   %ret = tail call fastcc i32 %func.ptr.load(i32 %a, i32 %b)
@@ -298,7 +298,7 @@ entry:
 ; GCN: s_swappc_b64
 ; GCN: s_cbranch_execnz
 ; GCN: s_setpc_b64
-define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr %func.ptr, i32 %a, i32 %b, i32 %c) #1 {
+define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr %func.ptr, i32 %a, i32 %b, i32 %c) nounwind noinline {
 entry:
   %add = add i32 %b, %c
   %ret = tail call fastcc i32 %func.ptr(i32 %a, i32 %add)
@@ -330,7 +330,7 @@ declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x
 ; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s32 offset:28{{$}}
 
 ; GCN: s_setpc_b64 [[TARGET_ADDR]]
-define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
+define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) nounwind noinline {
 entry:
   %alloca0 = alloca [3 x i32], align 16, addrspace(5)
   %alloca1 = alloca [2 x i64], align 8, addrspace(5)
@@ -362,7 +362,7 @@ declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([
 ; GCN-NEXT: s_add_u32
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_setpc_b64 [[TARGET_ADDR]]
-define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
+define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) nounwind noinline {
 entry:
   %alloca = alloca [3 x i32], align 16, addrspace(5)
   store [3 x i32] [i32 9, i32 9, i32 9], ptr addrspace(5) %alloca
@@ -378,7 +378,7 @@ declare hidden fastcc i64 @i64_fastcc_i64(i64 %arg0)
 ; GCN-NEXT: s_add_u32
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_setpc_b64
-define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 {
+define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) nounwind noinline {
 entry:
   %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a)
   ret i64 %ret
@@ -392,7 +392,7 @@ declare hidden fastcc ptr addrspace(1) @p1i8_fastcc_p1i8(ptr addrspace(1) %arg0)
 ; GCN-NEXT: s_add_u32
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_setpc_b64
-define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspace(1) %a) #1 {
+define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspace(1) %a) nounwind noinline {
 entry:
   %ret = tail call fastcc ptr addrspace(1) @p1i8_fastcc_p1i8(ptr addrspace(1) %a)
   ret ptr addrspace(1) %ret
@@ -406,7 +406,7 @@ declare hidden fastcc i16 @i16_fastcc_i16(i16 %arg0)
 ; GCN-NEXT: s_add_u32
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_setpc_b64
-define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 {
+define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) nounwind noinline {
 entry:
   %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a)
   ret i16 %ret
@@ -420,7 +420,7 @@ declare hidden fastcc half @f16_fastcc_f16(half %arg0)
 ; GCN-NEXT: s_add_u32
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_setpc_b64
-define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 {
+define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) nounwind noinline {
 entry:
   %ret = tail call fastcc half @f16_fastcc_f16(half %a)
   ret half %ret
@@ -434,7 +434,7 @@ declare hidden fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %arg0)
 ; GCN-NEXT: s_add_u32
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_setpc_b64
-define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 {
+define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) nounwind noinline {
 entry:
   %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a)
   ret <3 x i16> %ret
@@ -448,7 +448,7 @@ declare hidden fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %arg0)
 ; GCN-NEXT: s_add_u32
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_setpc_b64
-define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 {
+define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) nounwind noinline {
 entry:
   %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a)
   ret <4 x i16> %ret
@@ -462,11 +462,8 @@ declare hidden fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %arg0)
 ; GCN-NEXT: s_add_u32
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_setpc_b64
-define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 {
+define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) nounwind noinline {
 entry:
   %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a)
   ret <2 x i64> %ret
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/sign_extend.ll b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
index 9a03d216c7a99d..f17a8837c61a55 100644
--- a/llvm/test/CodeGen/AMDGPU/sign_extend.ll
+++ b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
@@ -330,7 +330,7 @@ define amdgpu_kernel void @v_sext_i1_to_i16_with_and(ptr addrspace(1) %out, i32
 ; VI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
-  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %cmp0 = icmp eq i32 %a, %tid
   %cmp1 = icmp eq i32 %b, %c
   %cmp = and i1 %cmp0, %cmp1
@@ -619,6 +619,4 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 731a88278e512c..365ab7af040867 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -764,9 +764,9 @@ declare float @_Z6sincosfPf(float, ptr)
 define amdgpu_kernel void @test_read_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) local_unnamed_addr {
 entry:
   %tmp1 = addrspacecast ptr addrspace(1) %ptr to ptr
-  %tmp2 = call i32 @__read_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) #0
+  %tmp2 = call i32 @__read_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) nounwind
   %tmp3 = call ptr addrspace(5) @__reserve_read_pipe(ptr addrspace(1) %p, i32 2, i32 4, i32 4)
-  %tmp4 = call i32 @__read_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) #0
+  %tmp4 = call i32 @__read_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) nounwind
   call void @__commit_read_pipe(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 4, i32 4)
   ret void
 }
@@ -785,10 +785,10 @@ declare void @__commit_read_pipe(ptr addrspace(1), ptr addrspace(5), i32, i32)
 define amdgpu_kernel void @test_write_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) local_unnamed_addr {
 entry:
   %tmp1 = addrspacecast ptr addrspace(1) %ptr to ptr
-  %tmp2 = call i32 @__write_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) #0
-  %tmp3 = call ptr addrspace(5) @__reserve_write_pipe(ptr addrspace(1) %p, i32 2, i32 4, i32 4) #0
-  %tmp4 = call i32 @__write_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) #0
-  call void @__commit_write_pipe(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 4, i32 4) #0
+  %tmp2 = call i32 @__write_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) nounwind
+  %tmp3 = call ptr addrspace(5) @__reserve_write_pipe(ptr addrspace(1) %p, i32 2, i32 4, i32 4) nounwind
+  %tmp4 = call i32 @__write_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) nounwind
+  call void @__commit_write_pipe(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 4, i32 4) nounwind
   ret void
 }
 
@@ -812,26 +812,26 @@ declare void @__commit_write_pipe(ptr addrspace(1), ptr addrspace(5), i32, i32)
 ; GCN-PRELINK: call i32 @__read_pipe_2_64(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]]
 ; GCN-PRELINK: call i32 @__read_pipe_2_128(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]]
 ; GCN-PRELINK: call i32 @__read_pipe_2(ptr addrspace(1) %{{.*}}, ptr %{{.*}} i32 400, i32 4) #[[$NOUNWIND]]
-define amdgpu_kernel void @test_pipe_size(ptr addrspace(1) %p1, ptr addrspace(1) %ptr1, ptr addrspace(1) %p2, ptr addrspace(1) %ptr2, ptr addrspace(1) %p4, ptr addrspace(1) %ptr4, ptr addrspace(1) %p8, ptr addrspace(1) %ptr8, ptr addrspace(1) %p16, ptr addrspace(1) %ptr16, ptr addrspace(1) %p32, ptr addrspace(1) %ptr32, ptr addrspace(1) %p64, ptr addrspace(1) %ptr64, ptr addrspace(1) %p128, ptr addrspace(1) %ptr128, ptr addrspace(1) %pu, ptr addrspace(1) %ptru) local_unnamed_addr #0 {
+define amdgpu_kernel void @test_pipe_size(ptr addrspace(1) %p1, ptr addrspace(1) %ptr1, ptr addrspace(1) %p2, ptr addrspace(1) %ptr2, ptr addrspace(1) %p4, ptr addrspace(1) %ptr4, ptr addrspace(1) %p8, ptr addrspace(1) %ptr8, ptr addrspace(1) %p16, ptr addrspace(1) %ptr16, ptr addrspace(1) %p32, ptr addrspace(1) %ptr32, ptr addrspace(1) %p64, ptr addrspace(1) %ptr64, ptr addrspace(1) %p128, ptr addrspace(1) %ptr128, ptr addrspace(1) %pu, ptr addrspace(1) %ptru) local_unnamed_addr nounwind {
 entry:
   %tmp = addrspacecast ptr addrspace(1) %ptr1 to ptr
-  %tmp1 = call i32 @__read_pipe_2(ptr addrspace(1) %p1, ptr %tmp, i32 1, i32 1) #0
+  %tmp1 = call i32 @__read_pipe_2(ptr addrspace(1) %p1, ptr %tmp, i32 1, i32 1) nounwind
   %tmp3 = addrspacecast ptr addrspace(1) %ptr2 to ptr
-  %tmp4 = call i32 @__read_pipe_2(ptr addrspace(1) %p2, ptr %tmp3, i32 2, i32 2) #0
+  %tmp4 = call i32 @__read_pipe_2(ptr addrspace(1) %p2, ptr %tmp3, i32 2, i32 2) nounwind
   %tmp6 = addrspacecast ptr addrspace(1) %ptr4 to ptr
-  %tmp7 = call i32 @__read_pipe_2(ptr addrspace(1) %p4, ptr %tmp6, i32 4, i32 4) #0
+  %tmp7 = call i32 @__read_pipe_2(ptr addrspace(1) %p4, ptr %tmp6, i32 4, i32 4) nounwind
   %tmp9 = addrspacecast ptr addrspace(1) %ptr8 to ptr
-  %tmp10 = call i32 @__read_pipe_2(ptr addrspace(1) %p8, ptr %tmp9, i32 8, i32 8) #0
+  %tmp10 = call i32 @__read_pipe_2(ptr addrspace(1) %p8, ptr %tmp9, i32 8, i32 8) nounwind
   %tmp12 = addrspacecast ptr addrspace(1) %ptr16 to ptr
-  %tmp13 = call i32 @__read_pipe_2(ptr addrspace(1) %p16, ptr %tmp12, i32 16, i32 16) #0
+  %tmp13 = call i32 @__read_pipe_2(ptr addrspace(1) %p16, ptr %tmp12, i32 16, i32 16) nounwind
   %tmp15 = addrspacecast ptr addrspace(1) %ptr32 to ptr
-  %tmp16 = call i32 @__read_pipe_2(ptr addrspace(1) %p32, ptr %tmp15, i32 32, i32 32) #0
+  %tmp16 = call i32 @__read_pipe_2(ptr addrspace(1) %p32, ptr %tmp15, i32 32, i32 32) nounwind
   %tmp18 = addrspacecast ptr addrspace(1) %ptr64 to ptr
-  %tmp19 = call i32 @__read_pipe_2(ptr addrspace(1) %p64, ptr %tmp18, i32 64, i32 64) #0
+  %tmp19 = call i32 @__read_pipe_2(ptr addrspace(1) %p64, ptr %tmp18, i32 64, i32 64) nounwind
   %tmp21 = addrspacecast ptr addrspace(1) %ptr128 to ptr
-  %tmp22 = call i32 @__read_pipe_2(ptr addrspace(1) %p128, ptr %tmp21, i32 128, i32 128) #0
+  %tmp22 = call i32 @__read_pipe_2(ptr addrspace(1) %p128, ptr %tmp21, i32 128, i32 128) nounwind
   %tmp24 = addrspacecast ptr addrspace(1) %ptru to ptr
-  %tmp25 = call i32 @__read_pipe_2(ptr addrspace(1) %pu, ptr %tmp24, i32 400, i32 4) #0
+  %tmp25 = call i32 @__read_pipe_2(ptr addrspace(1) %pu, ptr %tmp24, i32 400, i32 4) nounwind
   ret void
 }
 
@@ -839,4 +839,3 @@ entry:
 
 ; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind }
 ; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) }
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
index fec2fcba83f4a4..b9726e548b1f7a 100644
--- a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
@@ -10,13 +10,13 @@
 @0 = external unnamed_addr addrspace(3) global [462 x float], align 4
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.y() #0
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
 
 ; Function Attrs: nounwind readnone speculatable
-declare float @llvm.fmuladd.f32(float, float, float) #0
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone speculatable
 
 ; CHECK: s_endpgm
 define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 {
@@ -89,6 +89,4 @@ bb38:                                             ; preds = %bb37, %bb11
   ret void
 }
 
-attributes #0 = { nounwind readnone speculatable }
-
 !0 = !{i32 8, i32 16, i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
index e1273e1a4bcd08..2066e58f038c1a 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
@@ -15,12 +15,12 @@ main_body:
   br i1 undef, label %endif1, label %if1
 
 if1:                                              ; preds = %main_body
-  call void @llvm.amdgcn.kill(i1 false) #4
+  call void @llvm.amdgcn.kill(i1 false) nounwind
   br label %exit
 
 endif1:                                           ; preds = %main_body
   %i22 = extractelement <3 x float> %i, i32 2
-  %i23 = call nsz arcp contract float @llvm.fma.f32(float %i22, float 0.000000e+00, float 0.000000e+00) #1
+  %i23 = call nsz arcp contract float @llvm.fma.f32(float %i22, float 0.000000e+00, float 0.000000e+00) nounwind readnone
   br label %exit
 
 exit:                                             ; preds = %endif1, %if1
@@ -28,15 +28,10 @@ exit:                                             ; preds = %endif1, %if1
   ret float %i24
 }
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
+declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly willreturn
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.fma.f32(float, float, float) #2
+declare float @llvm.fma.f32(float, float, float) nofree nosync nounwind readnone speculatable willreturn
 
 ; Function Attrs: nounwind
-declare void @llvm.amdgcn.kill(i1) #4
-
-attributes #1 = { nounwind readnone }
-attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
-attributes #3 = { nounwind readonly willreturn }
-attributes #4 = { nounwind }
+declare void @llvm.amdgcn.kill(i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
index b03726817c1b48..63fc068442457f 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
@@ -5,7 +5,7 @@
 
 ; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600
 
-define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) nounwind {
 ; GFX6-LABEL: s_sint_to_fp_i64_to_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -84,7 +84,7 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %i
   ret void
 }
 
-define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_sint_to_fp_i64_to_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -183,7 +183,7 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) nounwind {
 ; GFX6-LABEL: s_sint_to_fp_i64_to_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -258,7 +258,7 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %i
   ret void
 }
 
-define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_sint_to_fp_i64_to_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -354,7 +354,7 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) nounwind{
 ; GFX6-LABEL: s_sint_to_fp_v2i64_to_v2f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -464,7 +464,7 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2
   ret void
 }
 
-define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_sint_to_fp_v4i64_to_v4f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -677,7 +677,7 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) nounwind{
 ; GFX6-LABEL: s_sint_to_fp_v2i64_to_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -799,7 +799,7 @@ define amdgpu_kernel void @s_sint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2
   ret void
 }
 
-define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_sint_to_fp_v4i64_to_v4f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1035,7 +1035,4 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, pt
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
index d4b0dfda5afda3..4de336de42553d 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
@@ -6,7 +6,7 @@
 ; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
 
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) nounwind {
   %result = sitofp i32 %in to float
   store float %result, ptr addrspace(1) %out
   ret void
@@ -16,7 +16,7 @@ define amdgpu_kernel void @s_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %i
 ; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{v[0-9]+$}}
 
 ; R600: INT_TO_FLT
-define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -32,7 +32,7 @@ define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr ad
 
 ; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
 ; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-define amdgpu_kernel void @s_sint_to_fp_v2i32(ptr addrspace(1) %out, <2 x i32> %in) #0{
+define amdgpu_kernel void @s_sint_to_fp_v2i32(ptr addrspace(1) %out, <2 x i32> %in) nounwind{
   %result = sitofp <2 x i32> %in to <2 x float>
   store <2 x float> %result, ptr addrspace(1) %out
   ret void
@@ -49,7 +49,7 @@ define amdgpu_kernel void @s_sint_to_fp_v2i32(ptr addrspace(1) %out, <2 x i32> %
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %value = load <4 x i32>, ptr addrspace(1) %in
   %result = sitofp <4 x i32> %value to <4 x float>
   store <4 x float> %result, ptr addrspace(1) %out
@@ -66,7 +66,7 @@ define amdgpu_kernel void @s_sint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, pt
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @v_sint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
@@ -82,7 +82,7 @@ define amdgpu_kernel void @v_sint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspa
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
-define amdgpu_kernel void @s_sint_to_fp_i1_f32(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i1_f32(ptr addrspace(1) %out, i32 %in) nounwind {
   %cmp = icmp eq i32 %in, 0
   %fp = uitofp i1 %cmp to float
   store float %fp, ptr addrspace(1) %out
@@ -93,7 +93,7 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32(ptr addrspace(1) %out, i32 %in) #
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
-define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, i1 %in) #0 {
+define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, i1 %in) nounwind {
   %fp = sitofp i1 %in to float
   store float %fp, ptr addrspace(1) %out
   ret void
@@ -106,7 +106,7 @@ define amdgpu_kernel void @s_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, i1 %i
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i1, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -116,7 +116,4 @@ define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll b/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll
index 6f768641b5b03e..a6ed29bfac05e0 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll
@@ -48,10 +48,6 @@ end:
   ret void
 }
 
-declare void @llvm.trap() #0
-declare void @llvm.debugtrap() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind noreturn }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone speculatable }
+declare void @llvm.trap() nounwind noreturn
+declare void @llvm.debugtrap() nounwind
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 138dd53b3ede41..172a0b5232c63e 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
 
-define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
+define amdgpu_ps void @test_kill_depth_0_imm_pos() nounwind {
 ; GCN-LABEL: test_kill_depth_0_imm_pos:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_endpgm
@@ -12,7 +12,7 @@ define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
   ret void
 }
 
-define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
+define amdgpu_ps void @test_kill_depth_0_imm_neg() nounwind {
 ; WAVE64-LABEL: test_kill_depth_0_imm_neg:
 ; WAVE64:       ; %bb.0:
 ; WAVE64-NEXT:    s_andn2_b64 exec, exec, exec
@@ -47,7 +47,7 @@ define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
 }
 
 ; FIXME: Ideally only one early-exit would be emitted
-define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
+define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() nounwind {
 ; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2:
 ; WAVE64:       ; %bb.0:
 ; WAVE64-NEXT:    s_mov_b64 s[0:1], exec
@@ -99,7 +99,7 @@ define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
   ret void
 }
 
-define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
+define amdgpu_ps void @test_kill_depth_var(float %x) nounwind {
 ; WAVE64-LABEL: test_kill_depth_var:
 ; WAVE64:       ; %bb.0:
 ; WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
@@ -138,7 +138,7 @@ define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
 }
 
 ; FIXME: Ideally only one early-exit would be emitted
-define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
+define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) nounwind {
 ; SI-LABEL: test_kill_depth_var_x2_same:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_mov_b64 s[0:1], exec
@@ -214,7 +214,7 @@ define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
 }
 
 ; FIXME: Ideally only one early-exit would be emitted
-define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
+define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) nounwind {
 ; SI-LABEL: test_kill_depth_var_x2:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_mov_b64 s[0:1], exec
@@ -290,7 +290,7 @@ define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
   ret void
 }
 
-define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
+define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) nounwind {
 ; SI-LABEL: test_kill_depth_var_x2_instructions:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_mov_b64 s[0:1], exec
@@ -380,7 +380,7 @@ define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
 }
 
 ; FIXME: why does the skip depend on the asm length in the same block?
-define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
+define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) nounwind {
 ; SI-LABEL: test_kill_control_flow:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_cmp_lg_u32 s0, 0
@@ -546,7 +546,7 @@ exit:
   ret float 1.0
 }
 
-define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
+define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) nounwind {
 ; SI-LABEL: test_kill_control_flow_remainder:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_cmp_lg_u32 s0, 0
@@ -759,7 +759,7 @@ exit:
   ret void
 }
 
-define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
+define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) nounwind {
 ; SI-LABEL: test_kill_control_flow_return:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_cmp_eq_u32 s0, 1
@@ -933,7 +933,7 @@ exit:
   ret float %ret
 }
 
-define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
+define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) nounwind {
 ; SI-LABEL: test_kill_divergent_loop:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_mov_b64 s[0:1], exec
@@ -1140,7 +1140,7 @@ exit:
 }
 
 ; bug 28550
-define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
+define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) nounwind {
 ; SI-LABEL: phi_use_def_before_kill:
 ; SI:       ; %bb.0: ; %bb
 ; SI-NEXT:    v_add_f32_e64 v1, s0, 1.0
@@ -1292,7 +1292,7 @@ end:
   ret void
 }
 
-define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
+define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) nounwind {
 ; SI-LABEL: no_skip_no_successors:
 ; SI:       ; %bb.0: ; %bb
 ; SI-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
@@ -1398,7 +1398,7 @@ bb7:                                              ; preds = %bb4
   ret void
 }
 
-define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
+define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) nounwind {
 ; SI-LABEL: if_after_kill_block:
 ; SI:       ; %bb.0: ; %bb
 ; SI-NEXT:    s_mov_b64 s[0:1], exec
@@ -1701,7 +1701,7 @@ live:
 
 export:
   %proxy = phi float [ undef, %kill ], [ %scale, %live ]
-  call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3
+  call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) inaccessiblememonly nounwind writeonly
   ret void
 }
 
@@ -1963,14 +1963,9 @@ bb.1:
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
-declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare void @llvm.amdgcn.kill(i1) #0
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) inaccessiblememonly nounwind writeonly
+declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare void @llvm.amdgcn.kill(i1) nounwind
 
 declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone speculatable }
-attributes #3 = { inaccessiblememonly nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/skip-promote-alloca-vector-users.ll b/llvm/test/CodeGen/AMDGPU/skip-promote-alloca-vector-users.ll
index e14ae06b80316c..d25c027a012ea1 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-promote-alloca-vector-users.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-promote-alloca-vector-users.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: @test_insertelement(
 ; CHECK:  %alloca = alloca i16
 ; CHECK-NEXT:  insertelement <2 x ptr addrspace(5)> undef, ptr addrspace(5) %alloca, i32 0
-define amdgpu_kernel void @test_insertelement() #0 {
+define amdgpu_kernel void @test_insertelement() nounwind {
 entry:
   %alloca = alloca i16, align 4, addrspace(5)
   %in = insertelement <2 x ptr addrspace(5)> undef, ptr addrspace(5) %alloca, i32 0
@@ -16,7 +16,7 @@ entry:
 ; CHECK-LABEL: @test_insertvalue(
 ; CHECK:  %alloca = alloca i16
 ; CHECK-NEXT:  insertvalue { ptr addrspace(5) } undef, ptr addrspace(5) %alloca, 0
-define amdgpu_kernel void @test_insertvalue() #0 {
+define amdgpu_kernel void @test_insertvalue() nounwind {
 entry:
   %alloca = alloca i16, align 4, addrspace(5)
   %in = insertvalue { ptr addrspace(5) } undef, ptr addrspace(5) %alloca, 0
@@ -27,12 +27,10 @@ entry:
 ; CHECK-LABEL: @test_insertvalue_array(
 ; CHECK:  %alloca = alloca i16
 ; CHECK-NEXT:  insertvalue [2 x ptr addrspace(5)] undef, ptr addrspace(5) %alloca, 0
-define amdgpu_kernel void @test_insertvalue_array() #0 {
+define amdgpu_kernel void @test_insertvalue_array() nounwind {
 entry:
   %alloca = alloca i16, align 4, addrspace(5)
   %in = insertvalue [2 x ptr addrspace(5)] undef, ptr addrspace(5) %alloca, 0
   store [2 x ptr addrspace(5)] %in, ptr undef, align 4
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/smed3.ll b/llvm/test/CodeGen/AMDGPU/smed3.ll
index e0d0ddce208c46..c89b8ab10394d4 100644
--- a/llvm/test/CodeGen/AMDGPU/smed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/smed3.ll
@@ -2,11 +2,11 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i32:
 ; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
-define amdgpu_kernel void @v_test_smed3_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -25,7 +25,7 @@ define amdgpu_kernel void @v_test_smed3_r_i_i_i32(ptr addrspace(1) %out, ptr add
 ; GCN-LABEL: {{^}}v_test_smed3_multi_use_r_i_i_i32:
 ; GCN: v_max_i32
 ; GCN: v_min_i32
-define amdgpu_kernel void @v_test_smed3_multi_use_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_multi_use_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -45,7 +45,7 @@ define amdgpu_kernel void @v_test_smed3_multi_use_r_i_i_i32(ptr addrspace(1) %ou
 ; GCN-LABEL: {{^}}v_test_smed3_r_i_i_sign_mismatch_i32:
 ; GCN: v_max_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
 ; GCN: v_min_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
-define amdgpu_kernel void @v_test_smed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -64,7 +64,7 @@ define amdgpu_kernel void @v_test_smed3_r_i_i_sign_mismatch_i32(ptr addrspace(1)
 ; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i64:
 ; GCN: v_cmp_lt_i64
 ; GCN: v_cmp_gt_i64
-define amdgpu_kernel void @v_test_smed3_r_i_i_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_r_i_i_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
@@ -98,7 +98,7 @@ declare i64 @llvm.smin.i64(i64, i64)
 ; VI: v_max_i16_e32 [[MAX:v[0-9]]], 12, {{v[0-9]}}
 ; VI: v_min_i16_e32 {{v[0-9]}}, 17, [[MAX]]
 ; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
-define amdgpu_kernel void @v_test_smed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_smed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i16, ptr addrspace(1) %out, i32 %tid
@@ -115,37 +115,37 @@ define amdgpu_kernel void @v_test_smed3_r_i_i_i16(ptr addrspace(1) %out, ptr add
 }
 
 
-define internal i32 @smin(i32 %x, i32 %y) #2 {
+define internal i32 @smin(i32 %x, i32 %y) nounwind readnone alwaysinline {
   %cmp = icmp slt i32 %x, %y
   %sel = select i1 %cmp, i32 %x, i32 %y
   ret i32 %sel
 }
 
-define internal i32 @smax(i32 %x, i32 %y) #2 {
+define internal i32 @smax(i32 %x, i32 %y) nounwind readnone alwaysinline {
   %cmp = icmp sgt i32 %x, %y
   %sel = select i1 %cmp, i32 %x, i32 %y
   ret i32 %sel
 }
 
-define internal i16 @smin16(i16 %x, i16 %y) #2 {
+define internal i16 @smin16(i16 %x, i16 %y) nounwind readnone alwaysinline {
   %cmp = icmp slt i16 %x, %y
   %sel = select i1 %cmp, i16 %x, i16 %y
   ret i16 %sel
 }
 
-define internal i16 @smax16(i16 %x, i16 %y) #2 {
+define internal i16 @smax16(i16 %x, i16 %y) nounwind readnone alwaysinline {
   %cmp = icmp sgt i16 %x, %y
   %sel = select i1 %cmp, i16 %x, i16 %y
   ret i16 %sel
 }
 
-define internal i8 @smin8(i8 %x, i8 %y) #2 {
+define internal i8 @smin8(i8 %x, i8 %y) nounwind readnone alwaysinline {
   %cmp = icmp slt i8 %x, %y
   %sel = select i1 %cmp, i8 %x, i8 %y
   ret i8 %sel
 }
 
-define internal i8 @smax8(i8 %x, i8 %y) #2 {
+define internal i8 @smax8(i8 %x, i8 %y) nounwind readnone alwaysinline {
   %cmp = icmp sgt i8 %x, %y
   %sel = select i1 %cmp, i8 %x, i8 %y
   ret i8 %sel
@@ -170,7 +170,7 @@ define internal i8 @smax8(i8 %x, i8 %y) #2 {
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -182,7 +182,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_1:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -194,7 +194,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_2:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -206,7 +206,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_3:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_3(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_3(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -218,7 +218,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_4:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_4(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_4(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -230,7 +230,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_5:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_5(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_5(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -242,7 +242,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_6:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_6(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_6(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -254,7 +254,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_7:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_7(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_7(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -266,7 +266,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_8:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_8(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_8(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -278,7 +278,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_9:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_9(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_9(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -290,7 +290,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_10:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_10(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_10(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -302,7 +302,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_11:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_11(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_11(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -314,7 +314,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_12:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_12(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_12(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -326,7 +326,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_13:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_13(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_13(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -338,7 +338,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_14:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_14(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_14(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -350,7 +350,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_15:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_15(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_15(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -375,7 +375,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_16:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_16(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_16(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -387,7 +387,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_17:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_17(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_17(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -399,7 +399,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_18:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_18(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_18(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -411,7 +411,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_19:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_19(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_19(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -423,7 +423,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_20:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_20(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_20(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -435,7 +435,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_21:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_21(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_21(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -447,7 +447,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_22:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_22(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_22(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -459,7 +459,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_23:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_23(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_23(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -471,7 +471,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_24:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_24(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_24(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -483,7 +483,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_25:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_25(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_25(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -495,7 +495,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_26:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_26(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_26(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -507,7 +507,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_27:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_27(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_27(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -519,7 +519,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_28:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_28(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_28(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -531,7 +531,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_29:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_29(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_29(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -543,7 +543,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_30:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_30(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_30(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -555,7 +555,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_31:
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_31(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_31(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %y, i32 %x)
   %tmp1 = call i32 @smax(i32 %y, i32 %x)
@@ -571,7 +571,7 @@ bb:
 ; GCN: s_sext_i32_i16
 ; GCN: s_sext_i32_i16
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i16_pat_0(ptr addrspace(1) %arg, [8 x i32], i16 %x, [8 x i32], i16 %y, [8 x i32], i16 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i16_pat_0(ptr addrspace(1) %arg, [8 x i32], i16 %x, [8 x i32], i16 %y, [8 x i32], i16 %z) nounwind {
 bb:
   %tmp0 = call i16 @smin16(i16 %x, i16 %y)
   %tmp1 = call i16 @smax16(i16 %x, i16 %y)
@@ -586,7 +586,7 @@ bb:
 ; GCN: s_sext_i32_i8
 ; GCN: s_sext_i32_i8
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i8_pat_0(ptr addrspace(1) %arg, [8 x i32], i8 %x, [8 x i32], i8 %y, [8 x i32], i8 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i8_pat_0(ptr addrspace(1) %arg, [8 x i32], i8 %x, [8 x i32], i8 %y, [8 x i32], i8 %z) nounwind {
 bb:
   %tmp0 = call i8 @smin8(i8 %x, i8 %y)
   %tmp1 = call i8 @smax8(i8 %x, i8 %y)
@@ -600,7 +600,7 @@ bb:
 ; GCN: s_min_i32
 ; GCN-NOT: {{s_min_i32|s_max_i32}}
 ; GCN: v_med3_i32
-define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -615,7 +615,7 @@ bb:
 ; GCN: s_max_i32
 ; GCN-NOT: {{s_min_i32|s_max_i32}}
 ; GCN: v_med3_i32
-define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -631,7 +631,7 @@ bb:
 ; GCN: s_min_i32
 ; GCN-NOT: {{s_min_i32|s_max_i32}}
 ; GCN: v_med3_i32
-define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -645,7 +645,7 @@ bb:
 ; GCN-LABEL: {{^}}s_test_smed3_i32_pat_0_multi_use_result:
 ; GCN-NOT: {{s_min_i32|s_max_i32}}
 ; GCN: v_med3_i32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_result(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_smed3_i32_pat_0_multi_use_result(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @smin(i32 %x, i32 %y)
   %tmp1 = call i32 @smax(i32 %x, i32 %y)
@@ -660,7 +660,7 @@ bb:
 ; GCN-NOT: {{s_min_i32|s_max_i32}}
 ; GCN: v_med3_i32 v{{[0-9]+}}, [[B0:s[0-9]+]], [[B1:v[0-9]+]], v{{[0-9]+}}
 ; GCN: v_med3_i32 v{{[0-9]+}}, [[B0]], [[B1]], v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_reuse_bounds(ptr addrspace(1) %arg, i32 %b0, i32 %b1, i32 %x, i32 %y) #1 {
+define amdgpu_kernel void @s_test_smed3_reuse_bounds(ptr addrspace(1) %arg, i32 %b0, i32 %b1, i32 %x, i32 %y) nounwind {
 bb:
   %lo = call i32 @smin(i32 %b0, i32 %b1)
   %hi = call i32 @smax(i32 %b0, i32 %b1)
@@ -686,7 +686,7 @@ bb:
 ; VI: v_max_i16
 
 ; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @v_test_smed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
+define amdgpu_kernel void @v_test_smed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i32 %tid
@@ -708,7 +708,7 @@ bb:
 ; GCN-LABEL: {{^}}v_test_smed3_i16_pat_1:
 ; GFX9: v_med3_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
-define amdgpu_kernel void @v_test_smed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
+define amdgpu_kernel void @v_test_smed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i32 %tid
@@ -726,7 +726,3 @@ bb:
   store i16 %tmp3, ptr addrspace(1) %out.gep
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone alwaysinline }
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll
index 65ae200275a0e8..ab580f02dc81fc 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll
@@ -268,7 +268,4 @@ define amdgpu_kernel void @v_min_max_i32_user(ptr addrspace(1) %out0, ptr addrsp
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
index d7a6be51106917..4a1e65712039ac 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
@@ -17,7 +17,7 @@
 ; CIVI-DAG: s_add_i32
 ; CIVI-DAG: s_and_b32
 ; CIVI-DAG: s_or_b32
-define amdgpu_kernel void @s_abs_v2i16(ptr addrspace(1) %out, <2 x i16> %val) #0 {
+define amdgpu_kernel void @s_abs_v2i16(ptr addrspace(1) %out, <2 x i16> %val) nounwind {
   %neg = sub <2 x i16> zeroinitializer, %val
   %cond = icmp sgt <2 x i16> %val, %neg
   %res = select <2 x i1> %cond, <2 x i16> %val, <2 x i16> %neg
@@ -53,7 +53,7 @@ define amdgpu_kernel void @s_abs_v2i16(ptr addrspace(1) %out, <2 x i16> %val) #0
 ; CI-DAG: v_add_i32
 ; CI-DAG: v_add_i32
 ; CI-DAG: v_or_b32
-define amdgpu_kernel void @v_abs_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @v_abs_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.in = getelementptr inbounds <2 x i16>, ptr addrspace(1) %src, i32 %tid
   %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i32 %tid
@@ -71,7 +71,7 @@ define amdgpu_kernel void @v_abs_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
 ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
-define amdgpu_kernel void @s_abs_v2i16_2(ptr addrspace(1) %out, <2 x i16> %val) #0 {
+define amdgpu_kernel void @s_abs_v2i16_2(ptr addrspace(1) %out, <2 x i16> %val) nounwind {
   %z0 = insertelement <2 x i16> undef, i16 0, i16 0
   %z1 = insertelement <2 x i16> %z0, i16 0, i16 1
   %t0 = insertelement <2 x i16> undef, i16 2, i16 0
@@ -89,7 +89,7 @@ define amdgpu_kernel void @s_abs_v2i16_2(ptr addrspace(1) %out, <2 x i16> %val)
 ; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
 ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
 ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
-define amdgpu_kernel void @v_abs_v2i16_2(ptr addrspace(1) %out, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @v_abs_v2i16_2(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %z0 = insertelement <2 x i16> undef, i16 0, i16 0
   %z1 = insertelement <2 x i16> %z0, i16 0, i16 1
   %t0 = insertelement <2 x i16> undef, i16 2, i16 0
@@ -113,7 +113,7 @@ define amdgpu_kernel void @v_abs_v2i16_2(ptr addrspace(1) %out, ptr addrspace(1)
 ; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], s[[#LOAD + 3]], [[SUB1]]
 ; GFX9-DAG: v_pk_add_u16 [[ADD0:v[0-9]+]], [[MAX0]], 2 op_sel_hi:[1,0]
 ; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2 op_sel_hi:[1,0]
-define amdgpu_kernel void @s_abs_v4i16(ptr addrspace(1) %out, <4 x i16> %val) #0 {
+define amdgpu_kernel void @s_abs_v4i16(ptr addrspace(1) %out, <4 x i16> %val) nounwind {
   %z0 = insertelement <4 x i16> undef, i16 0, i16 0
   %z1 = insertelement <4 x i16> %z0, i16 0, i16 1
   %z2 = insertelement <4 x i16> %z1, i16 0, i16 2
@@ -140,7 +140,7 @@ define amdgpu_kernel void @s_abs_v4i16(ptr addrspace(1) %out, <4 x i16> %val) #0
 ; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, v[[VAL1]]
 ; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], v[[VAL1]], [[SUB1]]
 ; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2 op_sel_hi:[1,0]
-define amdgpu_kernel void @v_abs_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @v_abs_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %src) nounwind {
   %z0 = insertelement <4 x i16> undef, i16 0, i16 0
   %z1 = insertelement <4 x i16> %z0, i16 0, i16 1
   %z2 = insertelement <4 x i16> %z1, i16 0, i16 2
@@ -163,7 +163,7 @@ define amdgpu_kernel void @v_abs_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %
 ; GCN-LABEL: {{^}}s_min_max_v2i16:
 ; GFX9: v_pk_max_i16
 ; GFX9: v_pk_min_i16
-define amdgpu_kernel void @s_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %val0, <2 x i16> %val1) #0 {
+define amdgpu_kernel void @s_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %val0, <2 x i16> %val1) nounwind {
   %cond0 = icmp sgt <2 x i16> %val0, %val1
   %sel0 = select <2 x i1> %cond0, <2 x i16> %val0, <2 x i16> %val1
   %sel1 = select <2 x i1> %cond0, <2 x i16> %val1, <2 x i16> %val0
@@ -176,7 +176,7 @@ define amdgpu_kernel void @s_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
 ; GCN-LABEL: {{^}}v_min_max_v2i16:
 ; GFX9: v_pk_max_i16
 ; GFX9: v_pk_min_i16
-define amdgpu_kernel void @v_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) #0 {
+define amdgpu_kernel void @v_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) nounwind {
   %val0 = load volatile <2 x i16>, ptr addrspace(1) %ptr0
   %val1 = load volatile <2 x i16>, ptr addrspace(1) %ptr1
 
@@ -194,7 +194,7 @@ define amdgpu_kernel void @v_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
 ; GFX9-DAG: v_pk_min_i16
 ; GFX9-DAG: v_pk_max_i16
 ; GFX9-DAG: v_pk_min_i16
-define amdgpu_kernel void @s_min_max_v4i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i16> %val0, <4 x i16> %val1) #0 {
+define amdgpu_kernel void @s_min_max_v4i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i16> %val0, <4 x i16> %val1) nounwind {
   %cond0 = icmp sgt <4 x i16> %val0, %val1
   %sel0 = select <4 x i1> %cond0, <4 x i16> %val0, <4 x i16> %val1
   %sel1 = select <4 x i1> %cond0, <4 x i16> %val1, <4 x i16> %val0
@@ -205,7 +205,7 @@ define amdgpu_kernel void @s_min_max_v4i16(ptr addrspace(1) %out0, ptr addrspace
 }
 
 ; GCN-LABEL: {{^}}v_min_max_v2i16_user:
-define amdgpu_kernel void @v_min_max_v2i16_user(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) #0 {
+define amdgpu_kernel void @v_min_max_v2i16_user(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) nounwind {
   %val0 = load volatile <2 x i16>, ptr addrspace(1) %ptr0
   %val1 = load volatile <2 x i16>, ptr addrspace(1) %ptr1
 
@@ -232,7 +232,4 @@ define amdgpu_kernel void @u_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index 4ce9260b8d53de..07a9078c5cc168 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -8,7 +8,7 @@
 ; GCN-LABEL: {{^}}smrd0:
 ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
 ; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
-define amdgpu_kernel void @smrd0(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @smrd0(ptr addrspace(1) %out, ptr addrspace(4) %ptr) nounwind {
 entry:
   %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 1
   %tmp1 = load i32, ptr addrspace(4) %tmp
@@ -20,7 +20,7 @@ entry:
 ; GCN-LABEL: {{^}}smrd1:
 ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
 ; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define amdgpu_kernel void @smrd1(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @smrd1(ptr addrspace(1) %out, ptr addrspace(4) %ptr) nounwind {
 entry:
   %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 255
   %tmp1 = load i32, ptr addrspace(4) %tmp
@@ -35,7 +35,7 @@ entry:
 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
 ; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
 ; GCN: s_endpgm
-define amdgpu_kernel void @smrd2(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @smrd2(ptr addrspace(1) %out, ptr addrspace(4) %ptr) nounwind {
 entry:
   %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 256
   %tmp1 = load i32, ptr addrspace(4) %tmp
@@ -50,7 +50,7 @@ entry:
 ; SI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0x13 ; encoding: [0x13
 ; TODO: Add VI checks
 ; GCN: s_endpgm
-define amdgpu_kernel void @smrd3(ptr addrspace(1) %out, [8 x i32], ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @smrd3(ptr addrspace(1) %out, [8 x i32], ptr addrspace(4) %ptr) nounwind {
 entry:
   %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 4294967296
   %tmp1 = load i32, ptr addrspace(4) %tmp
@@ -66,7 +66,7 @@ entry:
 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
 ; GFX9_10: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
 ; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
-define amdgpu_kernel void @smrd4(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @smrd4(ptr addrspace(1) %out, ptr addrspace(4) %ptr) nounwind {
 entry:
   %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 262143
   %tmp1 = load i32, ptr addrspace(4) %tmp
@@ -80,7 +80,7 @@ entry:
 ; SIVIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
 ; GCN: s_endpgm
-define amdgpu_kernel void @smrd5(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @smrd5(ptr addrspace(1) %out, ptr addrspace(4) %ptr) nounwind {
 entry:
   %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 262144
   %tmp1 = load i32, ptr addrspace(4) %tmp
@@ -93,7 +93,7 @@ entry:
 ; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4
 ; SICIVI: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0
 ; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, -0x4
-define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) nounwind {
 entry:
   %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -1
   %tmp1 = load i32, ptr addrspace(4) %tmp
@@ -106,7 +106,7 @@ entry:
 ; GCN: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, 0xffe00000
 ; SICIVI: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0
 ; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0
-define amdgpu_kernel void @smrd7(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
+define amdgpu_kernel void @smrd7(ptr addrspace(1) %out, ptr addrspace(4) %ptr) nounwind {
 entry:
   %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -524288
   %tmp1 = load i32, ptr addrspace(4) %tmp
@@ -121,7 +121,7 @@ entry:
 ; GCN-DAG: s_mov_b32 s0, 0
 ; SI-NEXT: nop 3
 ; GCN-NEXT: s_buffer_load_dword s0, s[0:3], 0x0
-define amdgpu_ps float @smrd_hazard(<4 x i32> inreg %desc) #0 {
+define amdgpu_ps float @smrd_hazard(<4 x i32> inreg %desc) nounwind {
 main_body:
   %d0 = insertelement <4 x i32> undef, i32 0, i32 0
   %d1 = insertelement <4 x i32> %d0, i32 1, i32 1
@@ -135,11 +135,11 @@ main_body:
 ; GCN-LABEL: {{^}}smrd_load_const0:
 ; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
 ; VIGFX9_10: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
-define amdgpu_ps void @smrd_load_const0(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @smrd_load_const0(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 16, i32 0)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %tmp21, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -150,14 +150,14 @@ main_body:
 ; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff glc ; encoding: [0xff
 ; VIGFX9_10: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]+}}], 0x3fc ;
 ; VIGFX9_10: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]+}}], 0x3fc glc ;
-define amdgpu_ps void @smrd_load_const1(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @smrd_load_const1(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 1020, i32 0)
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 1020, i32 1)
   %s.buffer.float = bitcast i32 %s.buffer to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -172,14 +172,14 @@ main_body:
 ; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
 ; VIGFX9_10: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]+}}], 0x400
 ; VIGFX9_10: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]+}}], 0x400
-define amdgpu_ps void @smrd_load_const2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @smrd_load_const2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 1024, i32 0)
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 1024, i32 0)
   %s.buffer.float = bitcast i32 %s.buffer to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -192,14 +192,14 @@ main_body:
 ; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
 ; VIGFX9_10: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]+}}], 0xffffc
 ; VIGFX9_10: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]+}}], 0xffffc
-define amdgpu_ps void @smrd_load_const3(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @smrd_load_const3(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 1048572, i32 0)
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 1048572, i32 0)
   %s.buffer.float = bitcast i32 %s.buffer to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -211,14 +211,14 @@ main_body:
 ; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
 ; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
 ; GCN: s_endpgm
-define amdgpu_ps void @smrd_load_const4(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @smrd_load_const4(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 1048576, i32 0)
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 1048576, i32 0)
   %s.buffer.float = bitcast i32 %s.buffer to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -226,7 +226,7 @@ main_body:
 ; GCN-LABEL: {{^}}s_buffer_load_dwordx2:
 ; VIGFX9_10: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x80
 ; SICI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x20
-define amdgpu_ps void @s_buffer_load_dwordx2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @s_buffer_load_dwordx2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %tmp22, i32 128, i32 0)
@@ -234,7 +234,7 @@ main_body:
   %s.buffer.0.float = bitcast i32 %s.buffer.0 to float
   %s.buffer.1 = extractelement <2 x i32> %s.buffer, i32 1
   %s.buffer.1.float = bitcast i32 %s.buffer.1 to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0.float, float %s.buffer.1.float, float %s.buffer.0.float, float %s.buffer.1.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0.float, float %s.buffer.1.float, float %s.buffer.0.float, float %s.buffer.1.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -242,7 +242,7 @@ main_body:
 ; GCN-LABEL: {{^}}s_buffer_load_dwordx4:
 ; VIGFX9_10: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x80
 ; SICI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x20
-define amdgpu_ps void @s_buffer_load_dwordx4(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @s_buffer_load_dwordx4(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %tmp22, i32 128, i32 0)
@@ -254,7 +254,7 @@ main_body:
   %s.buffer.2.float = bitcast i32 %s.buffer.2 to float
   %s.buffer.3 = extractelement <4 x i32> %s.buffer, i32 3
   %s.buffer.3.float = bitcast i32 %s.buffer.3 to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0.float, float %s.buffer.1.float, float %s.buffer.2.float, float %s.buffer.3.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0.float, float %s.buffer.1.float, float %s.buffer.2.float, float %s.buffer.3.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -262,7 +262,7 @@ main_body:
 ; GCN-LABEL: {{^}}s_buffer_load_dwordx8:
 ; VIGFX9_10: s_buffer_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x80
 ; SICI: s_buffer_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x20
-define amdgpu_ps void @s_buffer_load_dwordx8(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @s_buffer_load_dwordx8(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %tmp22, i32 128, i32 0)
@@ -274,7 +274,7 @@ main_body:
   %s.buffer.2.float = bitcast i32 %s.buffer.2 to float
   %s.buffer.3 = extractelement <8 x i32> %s.buffer, i32 7
   %s.buffer.3.float = bitcast i32 %s.buffer.3 to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0.float, float %s.buffer.1.float, float %s.buffer.2.float, float %s.buffer.3.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0.float, float %s.buffer.1.float, float %s.buffer.2.float, float %s.buffer.3.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -282,7 +282,7 @@ main_body:
 ; GCN-LABEL: {{^}}s_buffer_load_dwordx8_v8f32:
 ; VIGFX9_10: s_buffer_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x80
 ; SICI: s_buffer_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x20
-define amdgpu_ps void @s_buffer_load_dwordx8_v8f32(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @s_buffer_load_dwordx8_v8f32(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %tmp22, i32 128, i32 0)
@@ -290,7 +290,7 @@ main_body:
   %s.buffer.1 = extractelement <8 x float> %s.buffer, i32 2
   %s.buffer.2 = extractelement <8 x float> %s.buffer, i32 5
   %s.buffer.3 = extractelement <8 x float> %s.buffer, i32 7
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0, float %s.buffer.1, float %s.buffer.2, float %s.buffer.3, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0, float %s.buffer.1, float %s.buffer.2, float %s.buffer.3, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -298,7 +298,7 @@ main_body:
 ; GCN-LABEL: {{^}}s_buffer_load_dwordx16:
 ; VIGFX9_10: s_buffer_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x80
 ; SICI: s_buffer_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x20
-define amdgpu_ps void @s_buffer_load_dwordx16(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @s_buffer_load_dwordx16(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %tmp22, i32 128, i32 0)
@@ -310,14 +310,14 @@ main_body:
   %s.buffer.2.float = bitcast i32 %s.buffer.2 to float
   %s.buffer.3 = extractelement <16 x i32> %s.buffer, i32 15
   %s.buffer.3.float = bitcast i32 %s.buffer.3 to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0.float, float %s.buffer.1.float, float %s.buffer.2.float, float %s.buffer.3.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0.float, float %s.buffer.1.float, float %s.buffer.2.float, float %s.buffer.3.float, i1 true, i1 true) nounwind
   ret void
 }
 
 ; GCN-LABEL: {{^}}s_buffer_load_dwordx16_v16f32:
 ; VIGFX9_10: s_buffer_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x80
 ; SICI: s_buffer_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0x20
-define amdgpu_ps void @s_buffer_load_dwordx16_v16f32(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) #0 {
+define amdgpu_ps void @s_buffer_load_dwordx16_v16f32(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in) nounwind {
 main_body:
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %tmp22, i32 128, i32 0)
@@ -325,13 +325,13 @@ main_body:
   %s.buffer.1 = extractelement <16 x float> %s.buffer, i32 3
   %s.buffer.2 = extractelement <16 x float> %s.buffer, i32 12
   %s.buffer.3 = extractelement <16 x float> %s.buffer, i32 15
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0, float %s.buffer.1, float %s.buffer.2, float %s.buffer.3, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %s.buffer.0, float %s.buffer.1, float %s.buffer.2, float %s.buffer.3, i1 true, i1 true) nounwind
   ret void
 }
 
 ; GCN-LABEL: {{^}}smrd_sgpr_offset:
 ; GCN: s_buffer_load_dword s{{[0-9]}}, s[0:3], s4
-define amdgpu_ps float @smrd_sgpr_offset(<4 x i32> inreg %desc, i32 inreg %offset) #0 {
+define amdgpu_ps float @smrd_sgpr_offset(<4 x i32> inreg %desc, i32 inreg %offset) nounwind {
 main_body:
   %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 0)
   ret float %r
@@ -339,7 +339,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}smrd_vgpr_offset:
 ; GCN: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
-define amdgpu_ps float @smrd_vgpr_offset(<4 x i32> inreg %desc, i32 %offset) #0 {
+define amdgpu_ps float @smrd_vgpr_offset(<4 x i32> inreg %desc, i32 %offset) nounwind {
 main_body:
   %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 0)
   ret float %r
@@ -348,7 +348,7 @@ main_body:
 ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
 ; GCN-NEXT: %bb.
 ; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4092 ;
-define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
+define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) nounwind {
 main_body:
   %off = add i32 %offset, 4092
   %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %off, i32 0)
@@ -360,7 +360,7 @@ main_body:
 ; SICI-NEXT: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}0x1000, v0
 ; SICI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
 ; VIGFX9_10-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 4 offen offset:4092 ;
-define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 {
+define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) nounwind {
 main_body:
   %off = add i32 %offset, 4096
   %r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %off, i32 0)
@@ -374,7 +374,7 @@ main_body:
 ; GFX10-NEXT: s_clause
 ; VIGFX9_10-NEXT: s_buffer_load_dwordx4 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x4
 ; VIGFX9_10-NEXT: s_buffer_load_dwordx2 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x1c
-define amdgpu_ps void @smrd_imm_merged(<4 x i32> inreg %desc) #0 {
+define amdgpu_ps void @smrd_imm_merged(<4 x i32> inreg %desc) nounwind {
 main_body:
   %r1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 4, i32 0)
   %r2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 8, i32 0)
@@ -382,8 +382,8 @@ main_body:
   %r4 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 16, i32 0)
   %r5 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 28, i32 0)
   %r6 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 32, i32 0)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) #0
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -406,7 +406,7 @@ main_body:
 ;
 ; Merging is still thwarted on GFX9 due to s_set_gpr_idx
 ;
-define amdgpu_ps float @smrd_imm_merge_m0(<4 x i32> inreg %desc, i32 inreg %prim, float %u, float %v) #0 {
+define amdgpu_ps float @smrd_imm_merge_m0(<4 x i32> inreg %desc, i32 inreg %prim, float %u, float %v) nounwind {
 main_body:
   %idx1.f = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 0, i32 0)
   %idx1 = bitcast float %idx1.f to i32
@@ -445,7 +445,7 @@ main_body:
 ; GFX10-NEXT: s_clause
 ; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
 ; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
-define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 {
+define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) nounwind {
 main_body:
   %a1 = add i32 %a, 4
   %a2 = add i32 %a, 8
@@ -459,14 +459,14 @@ main_body:
   %r4 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %a4, i32 0)
   %r5 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %a5, i32 0)
   %r6 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %a6, i32 0)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) #0
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) nounwind
   ret void
 }
 
 ; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted
 ; GCN: v_readfirstlane
-define amdgpu_cs void @smrd_sgpr_descriptor_promoted(ptr addrspace(4) inreg noalias dereferenceable(18446744073709551615), i32) #0 {
+define amdgpu_cs void @smrd_sgpr_descriptor_promoted(ptr addrspace(4) inreg noalias dereferenceable(18446744073709551615), i32) nounwind {
 main_body:
   br label %.outer_loop_header
 
@@ -503,14 +503,14 @@ ret_block:                                       ; preds = %.outer, %.label22, %
 ; CI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
 ; CI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
 ; GCN: s_endpgm
-define amdgpu_ps void @smrd_load_nonconst0(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in, i32 inreg %ncoff) #0 {
+define amdgpu_ps void @smrd_load_nonconst0(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in, i32 inreg %ncoff) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 %ncoff, i32 0)
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 %ncoff, i32 0)
   %s.buffer.float = bitcast i32 %s.buffer to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -521,14 +521,14 @@ main_body:
 ; CI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 ; CI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 ; GCN: s_endpgm
-define amdgpu_ps void @smrd_load_nonconst1(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in, i32 %ncoff) #0 {
+define amdgpu_ps void @smrd_load_nonconst1(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in, i32 %ncoff) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 %ncoff, i32 0)
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %tmp22, i32 %ncoff, i32 0)
   %s.buffer.float = bitcast i32 %s.buffer to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -539,7 +539,7 @@ main_body:
 ; CI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 ; CI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 ; GCN: s_endpgm
-define amdgpu_ps void @smrd_load_nonconst2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in, i32 %ncoff) #0 {
+define amdgpu_ps void @smrd_load_nonconst2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in, i32 %ncoff) nounwind {
 main_body:
   %tmp20 = load <4 x i32>, ptr addrspace(4) %arg
   %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp20, i32 %ncoff, i32 0)
@@ -547,7 +547,7 @@ main_body:
   %s.buffer = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %tmp22, i32 %ncoff, i32 0)
   %s.buffer.elt = extractelement <8 x i32> %s.buffer, i32 1
   %s.buffer.float = bitcast i32 %s.buffer.elt to float
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp21, float %tmp21, float %tmp21, float %s.buffer.float, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -558,7 +558,7 @@ main_body:
 ; GCN-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ;
 ; GCN-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ;
 ; GCN: ; return to shader part epilog
-define amdgpu_ps <16 x float> @smrd_load_nonconst3(<4 x i32> inreg %rsrc, i32 %off) #0 {
+define amdgpu_ps <16 x float> @smrd_load_nonconst3(<4 x i32> inreg %rsrc, i32 %off) nounwind {
 main_body:
   %ld = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %off, i32 0)
   %bc = bitcast <16 x i32> %ld to <16 x float>
@@ -576,7 +576,7 @@ main_body:
 ; VIGFX9_10-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 56 offen offset:4064 ;
 ; VIGFX9_10-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 56 offen offset:4080 ;
 ; GCN: ; return to shader part epilog
-define amdgpu_ps <16 x float> @smrd_load_nonconst4(<4 x i32> inreg %rsrc, i32 %off) #0 {
+define amdgpu_ps <16 x float> @smrd_load_nonconst4(<4 x i32> inreg %rsrc, i32 %off) nounwind {
 main_body:
   %off.2 = add i32 %off, 4088
   %ld = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %off.2, i32 0)
@@ -596,7 +596,7 @@ main_body:
 ; VIGFX9_10-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], s4 offen offset:100 ;
 ; VIGFX9_10-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], s4 offen offset:116 ;
 ; GCN: ; return to shader part epilog
-define amdgpu_ps <16 x float> @smrd_load_nonconst5(<4 x i32> inreg %rsrc, i32 %off) #0 {
+define amdgpu_ps <16 x float> @smrd_load_nonconst5(<4 x i32> inreg %rsrc, i32 %off) nounwind {
 main_body:
   %off.2 = add i32 %off, 4100
   %ld = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %off.2, i32 0)
@@ -609,14 +609,14 @@ main_body:
 ; SIVIGFX9_10: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
 ; CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
 ; GCN: s_endpgm
-define amdgpu_ps void @smrd_load_dwordx2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in, i32 inreg %ncoff) #0 {
+define amdgpu_ps void @smrd_load_dwordx2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, ptr addrspace(4) inreg %in, i32 inreg %ncoff) nounwind {
 main_body:
   %tmp22 = load <4 x i32>, ptr addrspace(4) %in
   %s.buffer = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %tmp22, i32 %ncoff, i32 0)
   %s.buffer.float = bitcast <2 x i32> %s.buffer to <2 x float>
   %r.1 = extractelement <2 x float> %s.buffer.float, i32 0
   %r.2 = extractelement <2 x float> %s.buffer.float, i32 1
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r.1, float %r.1, float %r.1, float %r.2, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r.1, float %r.1, float %r.1, float %r.2, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -625,7 +625,7 @@ main_body:
 ; TODO: we should keep the loop counter in an SGPR
 ;
 ; GCN: s_buffer_load_dword
-define amdgpu_ps float @smrd_uniform_loop(<4 x i32> inreg %desc, i32 %bound) #0 {
+define amdgpu_ps float @smrd_uniform_loop(<4 x i32> inreg %desc, i32 %bound) nounwind {
 main_body:
   br label %loop
 
@@ -650,7 +650,7 @@ exit:
 ; TODO: we should keep the loop counter in an SGPR and use an S_BUFFER_LOAD
 ;
 ; GCN: buffer_load_dword
-define amdgpu_ps float @smrd_uniform_loop2(<4 x i32> inreg %desc, i32 %bound, i32 %bound.a) #0 {
+define amdgpu_ps float @smrd_uniform_loop2(<4 x i32> inreg %desc, i32 %bound, i32 %bound.a) nounwind {
 main_body:
   br label %loop
 
@@ -685,7 +685,7 @@ exit:
 ; GCN: buffer_load_dword v0, v0,
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: ; return to shader part epilog
-define amdgpu_cs float @arg_divergence(i32 inreg %unused, <3 x i32> %arg4) #0 {
+define amdgpu_cs float @arg_divergence(i32 inreg %unused, <3 x i32> %arg4) nounwind {
 main_body:
   br i1 undef, label %if1, label %endif1
 
@@ -739,11 +739,11 @@ define amdgpu_ps void @s_buffer_load_v16f32(<4 x i32> inreg %rsrc, i32 inreg %of
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind readnone speculatable
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind readnone speculatable
 
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) #1
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) nounwind readnone
 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
 declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32)
@@ -755,8 +755,4 @@ declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32)
 declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32)
 declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32)
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readnone speculatable }
-
 !0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll b/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll
index 6312816a40c247..a851d486406c2d 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll
@@ -24,6 +24,4 @@ bb8:                                              ; preds = %bb3, %bb
   ret void
 }
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
index 355829825146df..370b7a418e9af9 100644
--- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
@@ -9,13 +9,11 @@
 
 --- |
 
-  define amdgpu_kernel void @kernel() #0 {
+  define amdgpu_kernel void @kernel() "amdgpu-calls" "amdgpu-waves-per-eu"="10,10" "target-cpu"="gfx900" {
   bb:
     ret void
   }
 
-  attributes #0 = { "amdgpu-calls" "amdgpu-waves-per-eu"="10,10" "target-cpu"="gfx900" }
-
 ...
 ---
 name:            kernel
diff --git a/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll b/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
index 3176257920a7a1..79e4e72d4d8620 100644
--- a/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
+++ b/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
@@ -558,13 +558,10 @@ for.cond.cleanup26:                               ; preds = %for.cond28.preheade
   ret void
 }
 
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
-
-attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }
-attributes #1 = { nounwind readnone speculatable willreturn }
+declare float @llvm.fmuladd.f32(float, float, float) nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable willreturn
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable willreturn
 
 !0 = !{i32 1, i32 2, i32 1, i32 0}
 !1 = !{!"none", !"none", !"none", !"none"}
diff --git a/llvm/test/CodeGen/AMDGPU/sopk-compares.ll b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll
index 8b166b4c1bf3ff..b48067594237c0 100644
--- a/llvm/test/CodeGen/AMDGPU/sopk-compares.ll
+++ b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll
@@ -3,13 +3,13 @@
 
 ; Since this intrinsic is exposed as a constant after isel, use it to
 ; defeat the DAG's compare with constant canonicalizations.
-declare i32 @llvm.amdgcn.groupstaticsize() #1
+declare i32 @llvm.amdgcn.groupstaticsize() nounwind readnone
 
 @lds = addrspace(3) global [512 x i32] undef, align 4
 
 ; GCN-LABEL: {{^}}br_scc_eq_i32_inline_imm:
 ; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 4{{$}}
-define amdgpu_kernel void @br_scc_eq_i32_inline_imm(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_inline_imm(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i32 %cond, 4
   br i1 %cmp0, label %endif, label %if
@@ -25,7 +25,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max:
 ; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x7fff{{$}}
-define amdgpu_kernel void @br_scc_eq_i32_simm16_max(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_simm16_max(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i32 %cond, 32767
   br i1 %cmp0, label %endif, label %if
@@ -41,7 +41,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max_p1:
 ; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0x8000{{$}}
-define amdgpu_kernel void @br_scc_eq_i32_simm16_max_p1(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_simm16_max_p1(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i32 %cond, 32768
   br i1 %cmp0, label %endif, label %if
@@ -57,7 +57,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_ne_i32_simm16_max_p1:
 ; GCN: s_cmpk_lg_u32 s{{[0-9]+}}, 0x8000{{$}}
-define amdgpu_kernel void @br_scc_ne_i32_simm16_max_p1(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ne_i32_simm16_max_p1(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp ne i32 %cond, 32768
   br i1 %cmp0, label %endif, label %if
@@ -73,7 +73,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min:
 ; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x8000{{$}}
-define amdgpu_kernel void @br_scc_eq_i32_simm16_min(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_simm16_min(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i32 %cond, -32768
   br i1 %cmp0, label %endif, label %if
@@ -89,7 +89,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min_m1:
 ; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0xffff7fff{{$}}
-define amdgpu_kernel void @br_scc_eq_i32_simm16_min_m1(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_simm16_min_m1(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i32 %cond, -32769
   br i1 %cmp0, label %endif, label %if
@@ -105,7 +105,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_eq_i32_uimm15_max:
 ; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
-define amdgpu_kernel void @br_scc_eq_i32_uimm15_max(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_uimm15_max(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i32 %cond, 65535
   br i1 %cmp0, label %endif, label %if
@@ -121,7 +121,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max:
 ; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
-define amdgpu_kernel void @br_scc_eq_i32_uimm16_max(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_uimm16_max(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i32 %cond, 65535
   br i1 %cmp0, label %endif, label %if
@@ -137,7 +137,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max_p1:
 ; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0x10000{{$}}
-define amdgpu_kernel void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i32 %cond, 65536
   br i1 %cmp0, label %endif, label %if
@@ -154,7 +154,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_eq_i32:
 ; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x41{{$}}
-define amdgpu_kernel void @br_scc_eq_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i32 %cond, 65
   br i1 %cmp0, label %endif, label %if
@@ -170,7 +170,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_ne_i32:
 ; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x41{{$}}
-define amdgpu_kernel void @br_scc_ne_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ne_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp ne i32 %cond, 65
   br i1 %cmp0, label %endif, label %if
@@ -186,7 +186,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_sgt_i32:
 ; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x41{{$}}
-define amdgpu_kernel void @br_scc_sgt_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_sgt_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp sgt i32 %cond, 65
   br i1 %cmp0, label %endif, label %if
@@ -202,7 +202,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max:
 ; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x7fff{{$}}
-define amdgpu_kernel void @br_scc_sgt_i32_simm16_max(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_sgt_i32_simm16_max(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp sgt i32 %cond, 32767
   br i1 %cmp0, label %endif, label %if
@@ -218,7 +218,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max_p1:
 ; GCN: s_cmp_gt_i32 s{{[0-9]+}}, 0x8000{{$}}
-define amdgpu_kernel void @br_scc_sgt_i32_simm16_max_p1(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_sgt_i32_simm16_max_p1(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp sgt i32 %cond, 32768
   br i1 %cmp0, label %endif, label %if
@@ -234,7 +234,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_sge_i32:
 ; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @br_scc_sge_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_sge_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp sge i32 %cond, %size
@@ -251,7 +251,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_slt_i32:
 ; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x41{{$}}
-define amdgpu_kernel void @br_scc_slt_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_slt_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp slt i32 %cond, 65
   br i1 %cmp0, label %endif, label %if
@@ -267,7 +267,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_sle_i32:
 ; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @br_scc_sle_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_sle_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp sle i32 %cond, %size
@@ -284,7 +284,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_ugt_i32:
 ; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @br_scc_ugt_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ugt_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp ugt i32 %cond, %size
@@ -301,7 +301,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_uge_i32:
 ; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @br_scc_uge_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_uge_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp uge i32 %cond, %size
@@ -318,7 +318,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_ult_i32:
 ; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x41{{$}}
-define amdgpu_kernel void @br_scc_ult_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ult_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp ult i32 %cond, 65
   br i1 %cmp0, label %endif, label %if
@@ -334,7 +334,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16:
 ; GCN: s_cmp_lt_u32 s2, 0xffff8000
-define amdgpu_kernel void @br_scc_ult_i32_min_simm16(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ult_i32_min_simm16(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp ult i32 %cond, -32768
   br i1 %cmp0, label %endif, label %if
@@ -350,7 +350,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16_m1:
 ; GCN: s_cmp_lt_u32 s{{[0-9]+}}, 0xffff7fff{{$}}
-define amdgpu_kernel void @br_scc_ult_i32_min_simm16_m1(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ult_i32_min_simm16_m1(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp ult i32 %cond, -32769
   br i1 %cmp0, label %endif, label %if
@@ -366,7 +366,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_ule_i32:
 ; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @br_scc_ule_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ule_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp ule i32 %cond, %size
@@ -383,7 +383,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_eq_i32:
 ; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_eq_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_eq_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp eq i32 %size, %cond
@@ -400,7 +400,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_ne_i32:
 ; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_ne_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_ne_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp ne i32 %size, %cond
@@ -417,7 +417,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_sgt_i32:
 ; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_sgt_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_sgt_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp sgt i32 %size, %cond
@@ -434,7 +434,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_sge_i32:
 ; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_sge_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_sge_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp sge i32 %size, %cond
@@ -451,7 +451,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_slt_i32:
 ; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_slt_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_slt_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp slt i32 %size, %cond
@@ -468,7 +468,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_sle_i32:
 ; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_sle_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_sle_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp sle i32 %size, %cond
@@ -485,7 +485,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_ugt_i32:
 ; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_ugt_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_ugt_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp ugt i32 %size, %cond
@@ -502,7 +502,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_uge_i32:
 ; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_uge_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_uge_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp uge i32 %size, %cond
@@ -519,7 +519,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_ult_i32:
 ; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_ult_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_ult_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp ult i32 %size, %cond
@@ -536,7 +536,7 @@ endif:
 
 ; GCN-LABEL: {{^}}commute_br_scc_ule_i32:
 ; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
-define amdgpu_kernel void @commute_br_scc_ule_i32(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @commute_br_scc_ule_i32(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %cmp0 = icmp ule i32 %size, %cond
@@ -553,7 +553,7 @@ endif:
 
 ; GCN-LABEL: {{^}}br_scc_ult_i32_non_u16:
 ; GCN: s_cmp_lt_u32 s2, 0xfffff7ff
-define amdgpu_kernel void @br_scc_ult_i32_non_u16(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ult_i32_non_u16(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %not.size = xor i32 %size, -1
@@ -573,7 +573,7 @@ endif:
 ; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 4
 
 ; SI: v_cmp_eq_u64_e64
-define amdgpu_kernel void @br_scc_eq_i64_inline_imm(i64 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i64_inline_imm(i64 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i64 %cond, 4
   br i1 %cmp0, label %endif, label %if
@@ -593,7 +593,7 @@ endif:
 ; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]]
 
 ; SI: v_cmp_eq_u64_e32
-define amdgpu_kernel void @br_scc_eq_i64_simm16(i64 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_eq_i64_simm16(i64 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp eq i64 %cond, 4294968530
   br i1 %cmp0, label %endif, label %if
@@ -611,7 +611,7 @@ endif:
 ; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 4
 
 ; SI: v_cmp_ne_u64_e64
-define amdgpu_kernel void @br_scc_ne_i64_inline_imm(i64 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ne_i64_inline_imm(i64 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp ne i64 %cond, 4
   br i1 %cmp0, label %endif, label %if
@@ -631,7 +631,7 @@ endif:
 ; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]]
 
 ; SI: v_cmp_ne_u64_e32
-define amdgpu_kernel void @br_scc_ne_i64_simm16(i64 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @br_scc_ne_i64_simm16(i64 %cond, ptr addrspace(1) %out) nounwind {
 entry:
   %cmp0 = icmp ne i64 %cond, 4294968530
   br i1 %cmp0, label %endif, label %if
@@ -644,6 +644,3 @@ endif:
   store volatile i32 1, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
index c9413b61758d14..53db8806253638 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
@@ -9,7 +9,7 @@
 ; GCN-NOT: buffer_load_dword
 ; GCN:     v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
 ; GCN:     ScratchSize: 0
-define amdgpu_kernel void @max_12regs_13a_used(i32 %cond, ptr addrspace(1) %arg, ptr addrspace(1) %out) #2 {
+define amdgpu_kernel void @max_12regs_13a_used(i32 %cond, ptr addrspace(1) %arg, ptr addrspace(1) %out) nounwind "amdgpu-num-vgpr"="12" {
 bb:
   %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 1.0, <4 x float> %in.1, i32 0, i32 0, i32 0)
@@ -37,7 +37,7 @@ st:
 ; GCN-NOT: buffer_load_dword
 ; GCN:     v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
 ; GCN:     ScratchSize: 0
-define amdgpu_kernel void @max_10_vgprs_used_9a() #1 {
+define amdgpu_kernel void @max_10_vgprs_used_9a() nounwind "amdgpu-num-vgpr"="10" {
   %a1 = call <4 x i32> asm sideeffect "", "=a"()
   %a2 = call <4 x i32> asm sideeffect "", "=a"()
   %a3 = call i32 asm sideeffect "", "=a"()
@@ -56,7 +56,7 @@ define amdgpu_kernel void @max_10_vgprs_used_9a() #1 {
 ; GCN-NOT: buffer_load_dword
 ; GCN:     v_accvgpr_write_b32
 ; GCN:     ScratchSize: 0
-define amdgpu_kernel void @max_32regs_mfma32(ptr addrspace(1) %arg) #3 {
+define amdgpu_kernel void @max_32regs_mfma32(ptr addrspace(1) %arg) nounwind "amdgpu-num-vgpr"="32" {
 bb:
   %v = call i32 asm sideeffect "", "=a"()
   br label %use
@@ -103,7 +103,7 @@ use:
 ; GFX90A:  global_store_dwordx4 v[0:1], v[2:5], off
 
 ; GCN: ScratchSize: 20
-define amdgpu_kernel void @max_6regs_used_8a(ptr addrspace(1) %arg) #4 {
+define amdgpu_kernel void @max_6regs_used_8a(ptr addrspace(1) %arg) nounwind "amdgpu-num-vgpr"="6" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %v0 = call float asm sideeffect "; def $0", "=v"()
   %a4 = call <4 x float> asm sideeffect "; def $0", "=a"()
@@ -120,8 +120,3 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
 declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
-
-attributes #1 = { nounwind "amdgpu-num-vgpr"="10" }
-attributes #2 = { nounwind "amdgpu-num-vgpr"="12" }
-attributes #3 = { nounwind "amdgpu-num-vgpr"="32" }
-attributes #4 = { nounwind "amdgpu-num-vgpr"="6" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll b/llvm/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll
index cc42077475cae4..db7c7490fe6dbc 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll
@@ -22,7 +22,5 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll b/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll
index c3b6d8d761f267..244558442e8465 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll
@@ -15,7 +15,7 @@
 
 define amdgpu_kernel void @spill_cfg_position(ptr addrspace(1) nocapture %arg) {
 bb:
-  %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp14 = load i32, ptr addrspace(1) %arg, align 4
   %tmp15 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
   %tmp16 = load i32, ptr addrspace(1) %tmp15, align 4
@@ -73,6 +73,4 @@ bb52:                                             ; preds = %bb44, %bb36
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
index b045dd559aac26..ba3889b4d9bfcf 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
@@ -21,10 +21,10 @@
 ; GCN: s_mov_b64 exec
 ; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
 ; GCN: s_setpc_b64
-define void @spill_csr_s5_copy() #0 {
+define void @spill_csr_s5_copy() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" {
 bb:
   %alloca = alloca i32, addrspace(5)
-  %tmp = tail call i64 @func() #1
+  %tmp = tail call i64 @func() nounwind readnone
   %tmp1 = getelementptr inbounds i32, ptr addrspace(1) null, i64 %tmp
   %tmp2 = load i32, ptr addrspace(1) %tmp1, align 4
   %tmp3 = zext i32 %tmp2 to i64
@@ -32,7 +32,4 @@ bb:
   ret void
 }
 
-declare i64 @func() #0
-
-attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-attributes #1 = { nounwind readnone }
+declare i64 @func() nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index b2235544686f1c..b606c1688b98d9 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -32,18 +32,18 @@
 ; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
 
 ; GCN: s_add_i32 s{{[0-9]+}}, m0, 1
-define amdgpu_kernel void @spill_m0(i32 %cond, ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @spill_m0(i32 %cond, ptr addrspace(1) %out) nounwind {
 entry:
-  %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
+  %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() nounwind
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %if, label %endif
 
 if:
-  call void asm sideeffect "v_nop", ""() #0
+  call void asm sideeffect "v_nop", ""() nounwind
   br label %endif
 
 endif:
-  %foo = call i32 asm sideeffect "s_add_i32 $0, $1, 1", "=s,{m0}"(i32 %m0) #0
+  %foo = call i32 asm sideeffect "s_add_i32 $0, $1, 1", "=s,{m0}"(i32 %m0) nounwind
   store i32 %foo, ptr addrspace(1) %out
   ret void
 }
@@ -56,7 +56,7 @@ endif:
 ; GCN-NOT: v_readlane_b32 m0
 ; GCN-NOT: s_buffer_store_dword m0
 ; GCN-NOT: s_buffer_load_dword m0
-define amdgpu_ps void @spill_kill_m0_lds(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %m0) #0 {
+define amdgpu_ps void @spill_kill_m0_lds(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %m0) nounwind {
 main_body:
   %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0)
   %cmp = fcmp ueq float 0.000000e+00, %tmp
@@ -74,7 +74,7 @@ else:                                             ; preds = %main_body
 endif:                                            ; preds = %else, %if
   %export = phi float [ %lds_data, %if ], [ %interp, %else ]
   %tmp4 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %export, float %export)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp4, <2 x half> %tmp4, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp4, <2 x half> %tmp4, i1 true, i1 true) nounwind
   ret void
 }
 
@@ -105,11 +105,11 @@ endif:                                            ; preds = %else, %if
 ; GCN-NOT: v_readlane_b32 m0
 ; GCN-NOT: s_buffer_store_dword m0
 ; GCN-NOT: s_buffer_load_dword m0
-define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 {
+define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) nounwind {
 main_body:
-  %m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() #0
+  %m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() nounwind
   %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg)
-  call void asm sideeffect "; clobber $0", "~{m0}"() #0
+  call void asm sideeffect "; clobber $0", "~{m0}"() nounwind
   %cmp = fcmp ueq float 0.000000e+00, %tmp
    br i1 %cmp, label %if, label %else
 
@@ -168,28 +168,25 @@ endif:
 ; TOSMEM: s_dcache_wb
 ; TOSMEM: s_endpgm
 define amdgpu_kernel void @restore_m0_lds(i32 %arg) {
-  %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0
+  %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() nounwind
   %sval = load volatile i64, ptr addrspace(4) undef
   %cmp = icmp eq i32 %arg, 0
   br i1 %cmp, label %ret, label %bb
 
 bb:
   store volatile i64 %sval, ptr addrspace(3) undef
-  call void asm sideeffect "; use $0", "{m0}"(i32 %m0) #0
+  call void asm sideeffect "; use $0", "{m0}"(i32 %m0) nounwind
   br label %ret
 
 ret:
   ret void
 }
 
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare float @llvm.amdgcn.wqm.f32(float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) nounwind readnone
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+declare float @llvm.amdgcn.wqm.f32(float) nounwind readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
index baca66a287cbf2..d78ed96706de44 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
@@ -109,7 +109,7 @@ entry:
   ret void
 }
 
-define void @test_sgpr_offset_function_scavenge_fail_func() #2 {
+define void @test_sgpr_offset_function_scavenge_fail_func() nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" {
 ; MUBUF-LABEL: test_sgpr_offset_function_scavenge_fail_func:
 ; MUBUF:       ; %bb.0: ; %entry
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -184,13 +184,13 @@ entry:
   %asm6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 6
   %asm7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 7
 
-  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
+  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() nounwind
    ; Force %a to spill with no free SGPRs
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0, i32 %asm1, i32 %asm2, i32 %asm3, i32 %asm4, i32 %asm5, i32 %asm6, i32 %asm7, i32 %a)
   ret void
 }
 
-define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() #3 {
+define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() nounwind "amdgpu-num-sgpr"="18" "amdgpu-num-vgpr"="8" {
 ; MUBUF-LABEL: test_sgpr_offset_function_scavenge_fail_kernel:
 ; MUBUF:       ; %bb.0: ; %entry
 ; MUBUF-NEXT:    s_add_u32 s0, s0, s7
@@ -266,7 +266,7 @@ entry:
   %asm6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 6
   %asm7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 7
 
-  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
+  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() nounwind
    ; Force %a to spill with no free SGPRs
   call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0, i32 %asm1, i32 %asm2, i32 %asm3, i32 %asm4, i32 %asm5, i32 %asm6, i32 %asm7, i32 %a)
   ret void
@@ -633,8 +633,3 @@ entry:
 
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-num-sgpr"="17" "amdgpu-num-vgpr"="8" }
-attributes #2 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" }
-attributes #3 = { nounwind "amdgpu-num-sgpr"="18" "amdgpu-num-vgpr"="8" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
index ed57caadea5c56..122d0a223f865a 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
@@ -2,11 +2,9 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
 
 --- |
-  define amdgpu_kernel void @spill_regpressure_less() #0 {
+  define amdgpu_kernel void @spill_regpressure_less() "amdgpu-waves-per-eu"="8,8" {
     ret void
   }
-
-  attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index bea2e6d4b45a3c..e9cad45d1f56b1 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -10066,7 +10066,7 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %in) "amdgpu-waves-per-eu"="10,10" {
 ; GFX6-LABEL: test_limited_sgpr:
 ; GFX6:       ; %bb.0: ; %entry
 ; GFX6-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
@@ -11086,11 +11086,8 @@ ret:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
-
-attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}
 ; FLATSCR: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll
index f5e94df415ae4f..7bd83d0f1817fa 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll
@@ -5,7 +5,7 @@
 ; available to save exec. This scenario won't be true anymore as we reseve SGPR(s)
 ; upfront for saving exec.
 
-define amdgpu_kernel void @test() #1 {
+define amdgpu_kernel void @test() nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" {
 ; GFX10-LABEL: test:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
@@ -29,13 +29,10 @@ define amdgpu_kernel void @test() #1 {
 ; GFX10-NEXT:    ; use s[8:12]
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
-  %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "={s[0:7]}" () #0
-  %wide.sgpr2 = call <5 x i32> asm sideeffect "; def $0", "={s[8:12]}" () #0
-  call void asm sideeffect "", "~{v[0:7]}" () #0
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0
-  call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr2) #0
+  %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "={s[0:7]}" () nounwind
+  %wide.sgpr2 = call <5 x i32> asm sideeffect "; def $0", "={s[8:12]}" () nounwind
+  call void asm sideeffect "", "~{v[0:7]}" () nounwind
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) nounwind
+  call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr2) nounwind
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
index 537aca12bdb70e..c8344a2d1c8443 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -4,11 +4,9 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck --check-prefix=GFX11 %s
 
 --- |
-  define amdgpu_kernel void @check_vcc() #0 {
+  define amdgpu_kernel void @check_vcc() "frame-pointer"="all" {
     ret void
   }
-
-  attributes #0 = {  "frame-pointer"="all" }
 ...
 ---
 name:            check_vcc
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
index d5f97314f9324c..33b17149d95a6c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -stop-after=greedy,1 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
 ; Convert AV spills into VGPR spills by introducing appropriate copies in between.
 
-define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
+define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) nounwind "amdgpu-num-vgpr"="5" {
   ; GCN-LABEL: name: test_spill_av_class
   ; GCN: bb.0 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr4_sgpr5
@@ -28,5 +28,3 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
 }
 
 declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
-
-attributes #0 = { nounwind "amdgpu-num-vgpr"="5" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
index a9d687b78efa8c..f1282262a75828 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
@@ -14,7 +14,7 @@
 ; GFX908: ScratchSize: 0
 ; GFX908: VGPRBlocks: 2
 ; GFX908: NumVGPRsForWavesPerEU: 10
-define amdgpu_kernel void @max_11_vgprs_used_9a(ptr addrspace(1) %p) #0 {
+define amdgpu_kernel void @max_11_vgprs_used_9a(ptr addrspace(1) %p) nounwind "amdgpu-num-vgpr"="11" {
   %tid = load volatile i32, ptr addrspace(1) undef
   call void asm sideeffect "", "a,a,a,a,a,a,a,a,a"(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
   %p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid
@@ -83,7 +83,7 @@ define amdgpu_kernel void @max_11_vgprs_used_9a(ptr addrspace(1) %p) #0 {
 ; GFX908: ScratchSize: 12
 ; GFX908: VGPRBlocks: 2
 ; GFX908: NumVGPRsForWavesPerEU: 11
-define amdgpu_kernel void @max_11_vgprs_used_1a_partial_spill(ptr addrspace(1) %p) #0 {
+define amdgpu_kernel void @max_11_vgprs_used_1a_partial_spill(ptr addrspace(1) %p) nounwind "amdgpu-num-vgpr"="11" {
   %tid = load volatile i32, ptr addrspace(1) undef
   call void asm sideeffect "", "a"(i32 1)
   %p1 = getelementptr inbounds i64, ptr addrspace(1) %p, i32 %tid
@@ -104,5 +104,3 @@ define amdgpu_kernel void @max_11_vgprs_used_1a_partial_spill(ptr addrspace(1) %
   store volatile i64 %v5, ptr addrspace(1) %p1
   ret void
 }
-
-attributes #0 = { nounwind "amdgpu-num-vgpr"="11" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
index 9eacb88066c0f3..954f77fe03af11 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
@@ -17,7 +17,7 @@
 ; GCN:    VGPRBlocks: 2
 ; GFX900: NumVGPRsForWavesPerEU: 11
 ; GFX908: NumVGPRsForWavesPerEU: 10
-define amdgpu_kernel void @max_11_vgprs(ptr addrspace(1) %p) #2 {
+define amdgpu_kernel void @max_11_vgprs(ptr addrspace(1) %p) nounwind "amdgpu-num-vgpr"="11" {
   %tid = load volatile i32, ptr addrspace(1) undef
   %p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid
   %p2 = getelementptr inbounds i32, ptr addrspace(1) %p1, i32 4
@@ -66,7 +66,7 @@ define amdgpu_kernel void @max_11_vgprs(ptr addrspace(1) %p) #2 {
 ; GFX908: ScratchSize: 68
 ; GFX908: VGPRBlocks: 2
 ; GFX908: NumVGPRsForWavesPerEU: 10
-define amdgpu_kernel void @max_10_vgprs_spill_v32(ptr addrspace(1) %p) #0 {
+define amdgpu_kernel void @max_10_vgprs_spill_v32(ptr addrspace(1) %p) nounwind "amdgpu-num-vgpr"="10" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %p, i32 %tid
   %v = load volatile <32 x float>, ptr addrspace(1) %gep
@@ -92,7 +92,7 @@ define amdgpu_kernel void @max_10_vgprs_spill_v32(ptr addrspace(1) %p) #0 {
 ; GCN908:    VGPRBlocks: 62
 ; GFX900:    NumVGPRsForWavesPerEU: 256
 ; GFX908:    NumVGPRsForWavesPerEU: 252
-define amdgpu_kernel void @max_256_vgprs_spill_9x32(ptr addrspace(1) %p) #1 {
+define amdgpu_kernel void @max_256_vgprs_spill_9x32(ptr addrspace(1) %p) "amdgpu-flat-work-group-size"="1,256" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %p1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p, i32 %tid
   %p2 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p1, i32 %tid
@@ -143,7 +143,7 @@ define amdgpu_kernel void @max_256_vgprs_spill_9x32(ptr addrspace(1) %p) #1 {
 ; GFX908: VGPRBlocks: 62
 ; GFX900: NumVGPRsForWavesPerEU: 256
 ; GFX908: NumVGPRsForWavesPerEU: 252
-define amdgpu_kernel void @max_256_vgprs_spill_9x32_2bb(ptr addrspace(1) %p) #1 {
+define amdgpu_kernel void @max_256_vgprs_spill_9x32_2bb(ptr addrspace(1) %p) "amdgpu-flat-work-group-size"="1,256" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %p1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p, i32 %tid
   %p2 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p1, i32 %tid
@@ -185,7 +185,7 @@ st:
 ; GFX908: v_accvgpr_write_b32
 ; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32
 ; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
-define void @stack_args_vgpr_spill(<32 x float> %arg0, <32 x float> %arg1, ptr addrspace(1) %p) #1 {
+define void @stack_args_vgpr_spill(<32 x float> %arg0, <32 x float> %arg1, ptr addrspace(1) %p) "amdgpu-flat-work-group-size"="1,256" {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %p1 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p, i32 %tid
   %p2 = getelementptr inbounds <32 x float>, ptr addrspace(1) %p1, i32 %tid
@@ -218,7 +218,3 @@ st:
 
 
 declare i32 @llvm.amdgcn.workitem.id.x()
-
-attributes #0 = { nounwind "amdgpu-num-vgpr"="10" }
-attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
-attributes #2 = { nounwind "amdgpu-num-vgpr"="11" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll
index dd6e9b9d02eef5..18a53952f2b79c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll
@@ -15,13 +15,13 @@
 ; VMEM: s_cbranch_scc1
 
 ; VMEM: buffer_load_dword
-define amdgpu_kernel void @spill_sgpr_x2(ptr addrspace(1) %out, i32 %in) #0 {
-  %wide.sgpr = call <2 x i32>  asm sideeffect "; def $0", "=s" () #0
+define amdgpu_kernel void @spill_sgpr_x2(ptr addrspace(1) %out, i32 %in) nounwind {
+  %wide.sgpr = call <2 x i32>  asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0
+  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) nounwind
   br label %ret
 
 ret:
@@ -44,13 +44,13 @@ ret:
 ; VMEM: s_cbranch_scc1
 
 ; VMEM: buffer_load_dword
-define amdgpu_kernel void @spill_sgpr_x3(ptr addrspace(1) %out, i32 %in) #0 {
-  %wide.sgpr = call <3 x i32>  asm sideeffect "; def $0", "=s" () #0
+define amdgpu_kernel void @spill_sgpr_x3(ptr addrspace(1) %out, i32 %in) nounwind {
+  %wide.sgpr = call <3 x i32>  asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<3 x i32> %wide.sgpr) #0
+  call void asm sideeffect "; use $0", "s"(<3 x i32> %wide.sgpr) nounwind
   br label %ret
 
 ret:
@@ -75,13 +75,13 @@ ret:
 ; VMEM: s_cbranch_scc1
 
 ; VMEM: buffer_load_dword
-define amdgpu_kernel void @spill_sgpr_x4(ptr addrspace(1) %out, i32 %in) #0 {
-  %wide.sgpr = call <4 x i32>  asm sideeffect "; def $0", "=s" () #0
+define amdgpu_kernel void @spill_sgpr_x4(ptr addrspace(1) %out, i32 %in) nounwind {
+  %wide.sgpr = call <4 x i32>  asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) nounwind
   br label %ret
 
 ret:
@@ -108,13 +108,13 @@ ret:
 ; VMEM: s_cbranch_scc1
 
 ; VMEM: buffer_load_dword
-define amdgpu_kernel void @spill_sgpr_x5(ptr addrspace(1) %out, i32 %in) #0 {
-  %wide.sgpr = call <5 x i32>  asm sideeffect "; def $0", "=s" () #0
+define amdgpu_kernel void @spill_sgpr_x5(ptr addrspace(1) %out, i32 %in) nounwind {
+  %wide.sgpr = call <5 x i32>  asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) #0
+  call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) nounwind
   br label %ret
 
 ret:
@@ -146,13 +146,13 @@ ret:
 ; VMEM: s_cbranch_scc1
 
 ; VMEM: buffer_load_dword
-define amdgpu_kernel void @spill_sgpr_x8(ptr addrspace(1) %out, i32 %in) #0 {
-  %wide.sgpr = call <8 x i32>  asm sideeffect "; def $0", "=s" () #0
+define amdgpu_kernel void @spill_sgpr_x8(ptr addrspace(1) %out, i32 %in) nounwind {
+  %wide.sgpr = call <8 x i32>  asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0
+  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) nounwind
   br label %ret
 
 ret:
@@ -200,13 +200,13 @@ ret:
 ; VMEM: s_cbranch_scc1
 
 ; VMEM: buffer_load_dword
-define amdgpu_kernel void @spill_sgpr_x16(ptr addrspace(1) %out, i32 %in) #0 {
-  %wide.sgpr = call <16 x i32>  asm sideeffect "; def $0", "=s" () #0
+define amdgpu_kernel void @spill_sgpr_x16(ptr addrspace(1) %out, i32 %in) nounwind {
+  %wide.sgpr = call <16 x i32>  asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
+  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) nounwind
   br label %ret
 
 ret:
@@ -286,17 +286,15 @@ ret:
 ; VMEM: s_cbranch_scc1
 
 ; VMEM: buffer_load_dword
-define amdgpu_kernel void @spill_sgpr_x32(ptr addrspace(1) %out, i32 %in) #0 {
-  %wide.sgpr = call <32 x i32>  asm sideeffect "; def $0", "=s" () #0
+define amdgpu_kernel void @spill_sgpr_x32(ptr addrspace(1) %out, i32 %in) nounwind {
+  %wide.sgpr = call <32 x i32>  asm sideeffect "; def $0", "=s" () nounwind
   %cmp = icmp eq i32 %in, 0
   br i1 %cmp, label %bb0, label %ret
 
 bb0:
-  call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0
+  call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) nounwind
   br label %ret
 
 ret:
  ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
index 6d91c33fd28764..32d21f63bcd546 100644
--- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
 ; Make sure dbg_value reports something for argument registers when they are split into multiple registers
 
-define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unnamed_addr #0 !dbg !7 {
+define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unnamed_addr norecurse nounwind readnone !dbg !7 {
 ; GCN-LABEL: split_v4f32_arg:
 ; GCN:       .Lfunc_begin0:
 ; GCN-NEXT:    .file 0
@@ -23,7 +23,7 @@ define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unna
   ret <4 x float> %arg, !dbg !20
 }
 
-define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float> %arg1) local_unnamed_addr #0 !dbg !21 {
+define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float> %arg1) local_unnamed_addr norecurse nounwind readnone !dbg !21 {
 ; GCN-LABEL: split_v4f32_multi_arg:
 ; GCN:       .Lfunc_begin1:
 ; GCN-NEXT:    .loc 0 7 0 ; /tmp/dbg.cl:7:0
@@ -57,7 +57,7 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float>
   ret <4 x float> %add, !dbg !34
 }
 
-define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unnamed_addr #0 !dbg !35 {
+define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unnamed_addr norecurse nounwind readnone !dbg !35 {
 ; GCN-LABEL: split_v4f16_arg:
 ; GCN:       .Lfunc_begin2:
 ; GCN-NEXT:    .loc 0 11 0 is_stmt 1 ; /tmp/dbg.cl:11:0
@@ -75,7 +75,7 @@ define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unname
   ret <4 x half> %arg, !dbg !44
 }
 
-define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr #0 !dbg !45 {
+define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr norecurse nounwind readnone !dbg !45 {
 ; GCN-LABEL: split_f64_arg:
 ; GCN:       .Lfunc_begin3:
 ; GCN-NEXT:    .loc 0 15 0 ; /tmp/dbg.cl:15:0
@@ -93,7 +93,7 @@ define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr #0
   ret double %arg, !dbg !52
 }
 
-define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_unnamed_addr #0 !dbg !53 {
+define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_unnamed_addr norecurse nounwind readnone !dbg !53 {
 ; GCN-LABEL: split_v2f64_arg:
 ; GCN:       .Lfunc_begin4:
 ; GCN-NEXT:    .loc 0 19 0 ; /tmp/dbg.cl:19:0
@@ -113,7 +113,7 @@ define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_un
   ret <2 x double> %arg, !dbg !61
 }
 
-define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr #0 !dbg !62 {
+define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr norecurse nounwind readnone !dbg !62 {
 ; GCN-LABEL: split_i64_arg:
 ; GCN:       .Lfunc_begin5:
 ; GCN-NEXT:    .loc 0 23 0 ; /tmp/dbg.cl:23:0
@@ -131,7 +131,7 @@ define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr #0 !dbg !
   ret i64 %arg, !dbg !69
 }
 
-define hidden ptr addrspace(1) @split_ptr_arg(ptr addrspace(1) readnone returned %arg) local_unnamed_addr #0 !dbg !70 {
+define hidden ptr addrspace(1) @split_ptr_arg(ptr addrspace(1) readnone returned %arg) local_unnamed_addr norecurse nounwind readnone !dbg !70 {
 ; GCN-LABEL: split_ptr_arg:
 ; GCN:       .Lfunc_begin6:
 ; GCN-NEXT:    .loc 0 27 0 ; /tmp/dbg.cl:27:0
@@ -149,10 +149,7 @@ define hidden ptr addrspace(1) @split_ptr_arg(ptr addrspace(1) readnone returned
   ret ptr addrspace(1) %arg, !dbg !78
 }
 
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-
-attributes #0 = { norecurse nounwind readnone }
-attributes #1 = { nounwind readnone speculatable }
+declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone speculatable
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/CodeGen/AMDGPU/split-smrd.ll b/llvm/test/CodeGen/AMDGPU/split-smrd.ll
index b6087e1108b109..f0f282acee5914 100644
--- a/llvm/test/CodeGen/AMDGPU/split-smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-smrd.ll
@@ -6,7 +6,7 @@
 
 ; GCN-LABEL: {{^}}split_smrd_add_worklist:
 ; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1
-define amdgpu_ps void @split_smrd_add_worklist(ptr addrspace(4) inreg %arg) #0 {
+define amdgpu_ps void @split_smrd_add_worklist(ptr addrspace(4) inreg %arg) nounwind {
 bb:
   %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 96, i32 0)
   %tmp1 = bitcast float %tmp to i32
@@ -24,18 +24,14 @@ bb3:                                              ; preds = %bb
   %tmp9 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float), <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp10 = extractelement <4 x float> %tmp9, i32 0
   %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) nounwind
   ret void
 }
 
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) nounwind readnone
 
 !0 = !{!1, !1, i64 0, i32 1}
 !1 = !{!"const", !2}
diff --git a/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll b/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
index a5dcd9284edd0e..4f040efc25b719 100644
--- a/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
@@ -29,7 +29,7 @@
 ; GCN-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24
 
 ; GCN: s_endpgm
-define amdgpu_kernel void @ds_reorder_vector_split(ptr addrspace(1) nocapture readonly %srcValues, ptr addrspace(1) nocapture readonly %offsets, ptr addrspace(1) nocapture %destBuffer, i32 %alignmentOffset, i32 %tmp, i32 %tmp1, i32 %x.i.12.i) #0 {
+define amdgpu_kernel void @ds_reorder_vector_split(ptr addrspace(1) nocapture readonly %srcValues, ptr addrspace(1) nocapture readonly %offsets, ptr addrspace(1) nocapture %destBuffer, i32 %alignmentOffset, i32 %tmp, i32 %tmp1, i32 %x.i.12.i) norecurse nounwind {
 entry:
   %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp3 = tail call i32 @llvm.amdgcn.workitem.id.y()
@@ -38,7 +38,7 @@ entry:
   %tmp10 = add i32 %tmp3, %tmp6
   %tmp11 = mul i32 %tmp10, %tmp1
   %tmp9 = add i32 %tmp11, %tmp4
-  %x.i.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
+  %x.i.i = tail call i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
   %mul.26.i = mul i32 %x.i.12.i, %x.i.i
   %add.i = add i32 %tmp2, %mul.26.i
   %arrayidx = getelementptr [256 x [8 x <4 x i64>]], ptr addrspace(3) @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i
@@ -74,10 +74,7 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-declare i32 @llvm.amdgcn.workitem.id.z() #1
-
-attributes #0 = { norecurse nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit.mir b/llvm/test/CodeGen/AMDGPU/splitkit.mir
index dd3abf60078540..fbc9146d92ae29 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit.mir
@@ -1,11 +1,9 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
 # RUN: llc -o - %s -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s
 --- |
-  define amdgpu_kernel void @func0() #0 { ret void }
-  define amdgpu_kernel void @func1() #0 { ret void }
-  define amdgpu_kernel void @splitHoist() #0 { ret void }
-
-  attributes #0 = { "amdgpu-num-sgpr"="12" }
+  define amdgpu_kernel void @func0() "amdgpu-num-sgpr"="12" { ret void }
+  define amdgpu_kernel void @func1() "amdgpu-num-sgpr"="12" { ret void }
+  define amdgpu_kernel void @splitHoist() "amdgpu-num-sgpr"="12" { ret void }
 ...
 ---
 # Make sure we only get a single spill+reload even if liverange splitting
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index ae0221b8b32b33..45f73be3e2b88a 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -3,7 +3,7 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefixes=VI
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s -check-prefixes=EG
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 define amdgpu_kernel void @ashr_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: ashr_v2i32:
@@ -821,7 +821,7 @@ define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
 ; EG-NEXT:     LSHR T1.X, PV.W, literal.x,
 ; EG-NEXT:     ASHR * T0.Y, T0.X, literal.y,
 ; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
   %a = load i64, ptr addrspace(1) %gep.in
@@ -944,7 +944,7 @@ define amdgpu_kernel void @v_ashr_63_i64(ptr addrspace(1) %out, ptr addrspace(1)
 ; EG-NEXT:     LSHR T1.X, PV.W, literal.x,
 ; EG-NEXT:     MOV * T0.Y, PV.X,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
   %a = load i64, ptr addrspace(1) %gep.in
@@ -952,5 +952,3 @@ define amdgpu_kernel void @v_ashr_63_i64(ptr addrspace(1) %out, ptr addrspace(1)
   store i64 %result, ptr addrspace(1) %gep.out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-any.ll b/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-any.ll
index 331518c0c9d339..f1c1e4bdbf0e7e 100644
--- a/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-any.ll
+++ b/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-any.ll
@@ -6,8 +6,6 @@
 
 ; NOT-SUPPORTED: sramecc setting for subtarget: Unsupported
 ; ANY: sramecc setting for subtarget: Any
-define void @sramecc-subtarget-feature-default() #0 {
+define void @sramecc-subtarget-feature-default() nounwind {
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-disabled.ll b/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-disabled.ll
index 1e4e9f3e13fe2b..8947ab37417c99 100644
--- a/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-disabled.ll
+++ b/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-disabled.ll
@@ -7,8 +7,6 @@
 ; WARN: warning: sramecc 'Off' was requested for a processor that does not support it!
 ; OFF: sramecc setting for subtarget: Off
 
-define void @sramecc-subtarget-feature-disabled() #0 {
+define void @sramecc-subtarget-feature-disabled() "target-features"="-sramecc" {
   ret void
 }
-
-attributes #0 = { "target-features"="-sramecc" }
diff --git a/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-enabled.ll b/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-enabled.ll
index 713b276ddedb3c..3a3e7f050ddb5d 100644
--- a/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-enabled.ll
+++ b/llvm/test/CodeGen/AMDGPU/sramecc-subtarget-feature-enabled.ll
@@ -6,8 +6,6 @@
 
 ; WARN: warning: sramecc 'On' was requested for a processor that does not support it!
 ; ON: sramecc setting for subtarget: On
-define void @sramecc-subtarget-feature-enabled() #0 {
+define void @sramecc-subtarget-feature-enabled() "target-features"="+sramecc" {
   ret void
 }
-
-attributes #0 = { "target-features"="+sramecc" }
diff --git a/llvm/test/CodeGen/AMDGPU/srl.ll b/llvm/test/CodeGen/AMDGPU/srl.ll
index 418c160d4244af..9e32f306e6426e 100644
--- a/llvm/test/CodeGen/AMDGPU/srl.ll
+++ b/llvm/test/CodeGen/AMDGPU/srl.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefixes=VI
 ; RUN: llc -amdgpu-scalarize-global-loads=false  -mtriple=r600 -mcpu=redwood < %s | FileCheck %s -check-prefixes=EG
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 define amdgpu_kernel void @lshr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: lshr_i32:
@@ -464,7 +464,7 @@ define amdgpu_kernel void @v_lshr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, T0.W,
 ; EG-NEXT:     LSHR * T1.X, PV.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
   %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
   %a = load i64, ptr addrspace(1) %gep.in
@@ -472,5 +472,3 @@ define amdgpu_kernel void @v_lshr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
   store i64 %result, ptr addrspace(1) %gep.out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/ssubsat.ll
index 71017f15e3c6d1..baed10132c314c 100644
--- a/llvm/test/CodeGen/AMDGPU/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/ssubsat.ll
@@ -1097,17 +1097,15 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
   ret i64 %result
 }
 
-declare i8 @llvm.ssub.sat.i8(i8, i8) #0
-declare i16 @llvm.ssub.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare i32 @llvm.ssub.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) #0
-declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0
-declare i64 @llvm.ssub.sat.i64(i64, i64) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i8 @llvm.ssub.sat.i8(i8, i8) nounwind readnone speculatable willreturn
+declare i16 @llvm.ssub.sat.i16(i16, i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) nounwind readnone speculatable willreturn
+declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone speculatable willreturn
+declare i32 @llvm.ssub.sat.i32(i32, i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone speculatable willreturn
+declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) nounwind readnone speculatable willreturn
+declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone speculatable willreturn
+declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) nounwind readnone speculatable willreturn
+declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) nounwind readnone speculatable willreturn
+declare i64 @llvm.ssub.sat.i64(i64, i64) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
index 9ad9fa03048655..5c3e360ef8ea55 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
@@ -155,6 +155,4 @@ shader_eval_surface.exit:                         ; preds = %entry
   ret void
 }
 
-declare hidden i32 @svm_eval_nodes(ptr addrspace(5), ptr addrspace(5), ptr addrspace(5), i32, i32) local_unnamed_addr #0
-
-attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+declare hidden i32 @svm_eval_nodes(ptr addrspace(5), ptr addrspace(5), ptr addrspace(5), i32, i32) local_unnamed_addr nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
diff --git a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
index e378a83cff50d1..bd48518be97c8b 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
@@ -1,11 +1,11 @@
 ; RUN: not llc -mtriple=amdgcn-- < %s 2>&1 | FileCheck -check-prefix=ERROR %s
 ; RUN: not llc -mtriple=amdgcn-- < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture, i8, i32, i32, i1) #1
+declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture, i8, i32, i32, i1) "target-cpu" = "gfx1010"
 
 ; ERROR: error: <unknown>:0:0: stack frame size (131064) exceeds limit (131056) in function 'stack_size_limit_wave64'
 ; GCN: ; ScratchSize: 131064
-define amdgpu_kernel void @stack_size_limit_wave64() #0 {
+define amdgpu_kernel void @stack_size_limit_wave64() "target-cpu" = "gfx900" {
 entry:
   %alloca = alloca [131057 x i8], align 1, addrspace(5)
   call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 9, i32 131057, i32 1, i1 true)
@@ -14,7 +14,7 @@ entry:
 
 ; ERROR: error: <unknown>:0:0: stack frame size (262120) exceeds limit (262112) in function 'stack_size_limit_wave32'
 ; GCN: ; ScratchSize: 262120
-define amdgpu_kernel void @stack_size_limit_wave32() #1 {
+define amdgpu_kernel void @stack_size_limit_wave32() "target-cpu" = "gfx1010" {
 entry:
   %alloca = alloca [262113 x i8], align 1, addrspace(5)
   call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 9, i32 262113, i32 1, i1 true)
@@ -23,7 +23,7 @@ entry:
 
 ; ERROR-NOT: error:
 ; GCN: ; ScratchSize: 131056
-define amdgpu_kernel void @max_stack_size_wave64() #0 {
+define amdgpu_kernel void @max_stack_size_wave64() "target-cpu" = "gfx900" {
 entry:
   %alloca = alloca [131052 x i8], align 1, addrspace(5)
   call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 9, i32 131052, i32 1, i1 true)
@@ -32,12 +32,9 @@ entry:
 
 ; ERROR-NOT: error:
 ; GCN: ; ScratchSize: 262112
-define amdgpu_kernel void @max_stack_size_wave32() #1 {
+define amdgpu_kernel void @max_stack_size_wave32() "target-cpu" = "gfx1010" {
 entry:
   %alloca = alloca [262108 x i8], align 1, addrspace(5)
   call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 9, i32 262108, i32 1, i1 true)
   ret void
 }
-
-attributes #0 = { "target-cpu" = "gfx900" }
-attributes #1 = { "target-cpu" = "gfx1010" }
diff --git a/llvm/test/CodeGen/AMDGPU/store-barrier.ll b/llvm/test/CodeGen/AMDGPU/store-barrier.ll
index af48d7eca30f8b..fa6995b46e372b 100644
--- a/llvm/test/CodeGen/AMDGPU/store-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-barrier.ll
@@ -12,7 +12,7 @@
 ; CHECK: s_barrier
 ; CHECK: s_endpgm
 ; Function Attrs: nounwind
-define amdgpu_kernel void @test(ptr addrspace(3) nocapture %arg, ptr addrspace(1) nocapture readonly %arg1, ptr addrspace(1) nocapture readonly %arg2, ptr addrspace(1) nocapture %arg3, i32 %arg4, i64 %tmp9) #0 {
+define amdgpu_kernel void @test(ptr addrspace(3) nocapture %arg, ptr addrspace(1) nocapture readonly %arg1, ptr addrspace(1) nocapture readonly %arg2, ptr addrspace(1) nocapture %arg3, i32 %arg4, i64 %tmp9) nounwind {
 bb:
   %tmp10 = getelementptr inbounds i32, ptr addrspace(1) %arg2, i64 %tmp9
   %tmp13 = load i32, ptr addrspace(1) %tmp10, align 2
@@ -37,7 +37,4 @@ bb:
 }
 
 ; Function Attrs: convergent nounwind
-declare void @llvm.amdgcn.s.barrier() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/store-global.ll b/llvm/test/CodeGen/AMDGPU/store-global.ll
index f068b1481aa9ab..b3e85d8bdc0d45 100644
--- a/llvm/test/CodeGen/AMDGPU/store-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-global.ll
@@ -371,7 +371,7 @@ entry:
 
 ; SIVI: buffer_store_dwordx2
 ; GFX9: global_store_dwordx2
-define amdgpu_kernel void @vecload2(ptr addrspace(1) nocapture %out, ptr addrspace(4) nocapture %mem) #0 {
+define amdgpu_kernel void @vecload2(ptr addrspace(1) nocapture %out, ptr addrspace(4) nocapture %mem) nounwind {
 entry:
   %0 = load i32, ptr addrspace(4) %mem, align 4
   %arrayidx1.i = getelementptr inbounds i32, ptr addrspace(4) %mem, i64 1
@@ -402,5 +402,3 @@ entry:
   store i32 2, ptr addrspace(1) %arrayidx6, align 4
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
index 470873f6f74976..7d59a8e275374c 100644
--- a/llvm/test/CodeGen/AMDGPU/store-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
@@ -13,7 +13,7 @@
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_global_hi_v2i16(ptr addrspace(1) %out, i32 %arg) #0 {
+define void @store_global_hi_v2i16(ptr addrspace(1) %out, i32 %arg) nounwind {
 entry:
   ; FIXME: ABI for pre-gfx9
   %value = bitcast i32 %arg to <2 x i16>
@@ -32,7 +32,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_global_hi_v2f16(ptr addrspace(1) %out, i32 %arg) #0 {
+define void @store_global_hi_v2f16(ptr addrspace(1) %out, i32 %arg) nounwind {
 entry:
   ; FIXME: ABI for pre-gfx9
   %value = bitcast i32 %arg to <2 x half>
@@ -51,7 +51,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_global_hi_i32_shift(ptr addrspace(1) %out, i32 %value) #0 {
+define void @store_global_hi_i32_shift(ptr addrspace(1) %out, i32 %value) nounwind {
 entry:
   %hi32 = lshr i32 %value, 16
   %hi = trunc i32 %hi32 to i16
@@ -69,7 +69,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_global_hi_v2i16_i8(ptr addrspace(1) %out, i32 %arg) #0 {
+define void @store_global_hi_v2i16_i8(ptr addrspace(1) %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -88,7 +88,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_global_hi_i8_shift(ptr addrspace(1) %out, i32 %value) #0 {
+define void @store_global_hi_i8_shift(ptr addrspace(1) %out, i32 %value) nounwind {
 entry:
   %hi32 = lshr i32 %value, 16
   %hi = trunc i32 %hi32 to i8
@@ -107,7 +107,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_global_hi_v2i16_max_offset(ptr addrspace(1) %out, i32 %arg) #0 {
+define void @store_global_hi_v2i16_max_offset(ptr addrspace(1) %out, i32 %arg) nounwind {
 entry:
   ; FIXME: ABI for pre-gfx9
   %value = bitcast i32 %arg to <2 x i16>
@@ -128,7 +128,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_global_hi_v2i16_min_offset(ptr addrspace(1) %out, i32 %arg) #0 {
+define void @store_global_hi_v2i16_min_offset(ptr addrspace(1) %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -148,7 +148,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_global_hi_v2i16_i8_max_offset(ptr addrspace(1) %out, i32 %arg) #0 {
+define void @store_global_hi_v2i16_i8_max_offset(ptr addrspace(1) %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -169,7 +169,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_global_hi_v2i16_i8_min_offset(ptr addrspace(1) %out, i32 %arg) #0 {
+define void @store_global_hi_v2i16_i8_min_offset(ptr addrspace(1) %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -189,7 +189,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16(ptr %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16(ptr %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -207,7 +207,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2f16(ptr %out, i32 %arg) #0 {
+define void @store_flat_hi_v2f16(ptr %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x half>
   %hi = extractelement <2 x half> %value, i32 1
@@ -225,7 +225,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_i32_shift(ptr %out, i32 %value) #0 {
+define void @store_flat_hi_i32_shift(ptr %out, i32 %value) nounwind {
 entry:
   %hi32 = lshr i32 %value, 16
   %hi = trunc i32 %hi32 to i16
@@ -243,7 +243,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_i8(ptr %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_i8(ptr %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -262,7 +262,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_i8_shift(ptr %out, i32 %value) #0 {
+define void @store_flat_hi_i8_shift(ptr %out, i32 %value) nounwind {
 entry:
   %hi32 = lshr i32 %value, 16
   %hi = trunc i32 %hi32 to i8
@@ -281,7 +281,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_max_offset(ptr %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_max_offset(ptr %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -302,7 +302,7 @@ entry:
 ; GFX803: flat_store_short v[0:1], v2{{$}}
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_neg_offset(ptr %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_neg_offset(ptr %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -322,7 +322,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_i8_max_offset(ptr %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_i8_max_offset(ptr %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -348,7 +348,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_flat_hi_v2i16_i8_neg_offset(ptr %out, i32 %arg) #0 {
+define void @store_flat_hi_v2i16_i8_neg_offset(ptr %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -369,7 +369,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16(ptr addrspace(5) %out, i32 %arg) #0 {
+define void @store_private_hi_v2i16(ptr addrspace(5) %out, i32 %arg) nounwind {
 entry:
   ; FIXME: ABI for pre-gfx9
   %value = bitcast i32 %arg to <2 x i16>
@@ -389,7 +389,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2f16(ptr addrspace(5) %out, i32 %arg) #0 {
+define void @store_private_hi_v2f16(ptr addrspace(5) %out, i32 %arg) nounwind {
 entry:
   ; FIXME: ABI for pre-gfx9
   %value = bitcast i32 %arg to <2 x half>
@@ -409,7 +409,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_i32_shift(ptr addrspace(5) %out, i32 %value) #0 {
+define void @store_private_hi_i32_shift(ptr addrspace(5) %out, i32 %value) nounwind {
 entry:
   %hi32 = lshr i32 %value, 16
   %hi = trunc i32 %hi32 to i16
@@ -428,7 +428,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16_i8(ptr addrspace(5) %out, i32 %arg) #0 {
+define void @store_private_hi_v2i16_i8(ptr addrspace(5) %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -448,7 +448,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_i8_shift(ptr addrspace(5) %out, i32 %value) #0 {
+define void @store_private_hi_i8_shift(ptr addrspace(5) %out, i32 %value) nounwind {
 entry:
   %hi32 = lshr i32 %value, 16
   %hi = trunc i32 %hi32 to i8
@@ -466,7 +466,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16_max_offset(ptr addrspace(5) byval(i16) %out, i32 %arg) #0 {
+define void @store_private_hi_v2i16_max_offset(ptr addrspace(5) byval(i16) %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -489,7 +489,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16_nooff(i32 %arg) #0 {
+define void @store_private_hi_v2i16_nooff(i32 %arg) nounwind {
 entry:
   ; FIXME: ABI for pre-gfx9
   %value = bitcast i32 %arg to <2 x i16>
@@ -511,7 +511,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 {
+define void @store_private_hi_v2i16_i8_nooff(i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -530,7 +530,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_local_hi_v2i16(ptr addrspace(3) %out, i32 %arg) #0 {
+define void @store_local_hi_v2i16(ptr addrspace(3) %out, i32 %arg) nounwind {
 entry:
   ; FIXME: ABI for pre-gfx9
   %value = bitcast i32 %arg to <2 x i16>
@@ -549,7 +549,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_local_hi_v2f16(ptr addrspace(3) %out, i32 %arg) #0 {
+define void @store_local_hi_v2f16(ptr addrspace(3) %out, i32 %arg) nounwind {
 entry:
   ; FIXME: ABI for pre-gfx9
   %value = bitcast i32 %arg to <2 x half>
@@ -568,7 +568,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_local_hi_i32_shift(ptr addrspace(3) %out, i32 %value) #0 {
+define void @store_local_hi_i32_shift(ptr addrspace(3) %out, i32 %value) nounwind {
 entry:
   %hi32 = lshr i32 %value, 16
   %hi = trunc i32 %hi32 to i16
@@ -586,7 +586,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_local_hi_v2i16_i8(ptr addrspace(3) %out, i32 %arg) #0 {
+define void @store_local_hi_v2i16_i8(ptr addrspace(3) %out, i32 %arg) nounwind {
 entry:
   %value = bitcast i32 %arg to <2 x i16>
   %hi = extractelement <2 x i16> %value, i32 1
@@ -604,7 +604,7 @@ entry:
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
-define void @store_local_hi_v2i16_max_offset(ptr addrspace(3) %out, i32 %arg) #0 {
+define void @store_local_hi_v2i16_max_offset(ptr addrspace(3) %out, i32 %arg) nounwind {
 entry:
   ; FIXME: ABI for pre-gfx9
   %value = bitcast i32 %arg to <2 x i16>
@@ -624,7 +624,7 @@ entry:
 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi off, v0, s32 offset:4058
 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-define void @store_private_hi_v2i16_to_offset(i32 %arg, ptr addrspace(5) %obj0) #0 {
+define void @store_private_hi_v2i16_to_offset(i32 %arg, ptr addrspace(5) %obj0) nounwind {
 entry:
   %obj1 = alloca [4096 x i16], align 2, addrspace(5)
   store volatile i32 123, ptr addrspace(5) %obj0
@@ -644,7 +644,7 @@ entry:
 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
 ; GFX9-FLATSCR-NEXT: scratch_store_byte_d16_hi off, v0, s32 offset:4059
 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-define void @store_private_hi_v2i16_i8_to_offset(i32 %arg, ptr addrspace(5) %obj0) #0 {
+define void @store_private_hi_v2i16_i8_to_offset(i32 %arg, ptr addrspace(5) %obj0) nounwind {
 entry:
   %obj1 = alloca [4096 x i8], align 2, addrspace(5)
   store volatile i32 123, ptr addrspace(5) %obj0
@@ -655,5 +655,3 @@ entry:
   store i8 %trunc, ptr addrspace(5) %gep
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/store-private.ll b/llvm/test/CodeGen/AMDGPU/store-private.ll
index 1c4ac88c9ed398..055b478b16e8d5 100644
--- a/llvm/test/CodeGen/AMDGPU/store-private.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-private.ll
@@ -691,7 +691,7 @@ entry:
 ; XSI: buffer_store_dwordx2
 ; SI: buffer_store_dword
 ; SI: buffer_store_dword
-define amdgpu_kernel void @vecload2(ptr addrspace(5) nocapture %out, ptr addrspace(4) nocapture %mem) #0 {
+define amdgpu_kernel void @vecload2(ptr addrspace(5) nocapture %out, ptr addrspace(4) nocapture %mem) nounwind {
 entry:
   %0 = load i32, ptr addrspace(4) %mem, align 4
   %arrayidx1.i = getelementptr inbounds i32, ptr addrspace(4) %mem, i64 1
@@ -740,6 +740,3 @@ entry:
   store i32 2, ptr addrspace(5) %arrayidx6, align 4
   ret void
 }
-
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
index f88aaf389ca9ae..daa013530eddfc 100644
--- a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-define void @local_store_i56(ptr addrspace(3) %ptr, i56 %arg) #0 {
+define void @local_store_i56(ptr addrspace(3) %ptr, i56 %arg) nounwind {
 ; CIVI-LABEL: local_store_i56:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -47,7 +47,7 @@ define void @local_store_i56(ptr addrspace(3) %ptr, i56 %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @local_store_i55(ptr addrspace(3) %ptr, i55 %arg) #0 {
+define amdgpu_kernel void @local_store_i55(ptr addrspace(3) %ptr, i55 %arg) nounwind {
 ; HAWAII-LABEL: local_store_i55:
 ; HAWAII:       ; %bb.0:
 ; HAWAII-NEXT:    s_or_b32 s0, s4, 14
@@ -153,7 +153,7 @@ define amdgpu_kernel void @local_store_i55(ptr addrspace(3) %ptr, i55 %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @local_store_i48(ptr addrspace(3) %ptr, i48 %arg) #0 {
+define amdgpu_kernel void @local_store_i48(ptr addrspace(3) %ptr, i48 %arg) nounwind {
 ; HAWAII-LABEL: local_store_i48:
 ; HAWAII:       ; %bb.0:
 ; HAWAII-NEXT:    s_load_dword s2, s[4:5], 0x0
@@ -220,7 +220,7 @@ define amdgpu_kernel void @local_store_i48(ptr addrspace(3) %ptr, i48 %arg) #0 {
   ret void
 }
 
-define amdgpu_kernel void @local_store_i65(ptr addrspace(3) %ptr, i65 %arg) #0 {
+define amdgpu_kernel void @local_store_i65(ptr addrspace(3) %ptr, i65 %arg) nounwind {
 ; HAWAII-LABEL: local_store_i65:
 ; HAWAII:       ; %bb.0:
 ; HAWAII-NEXT:    s_load_dword s2, s[4:5], 0x4
@@ -302,7 +302,7 @@ define amdgpu_kernel void @local_store_i65(ptr addrspace(3) %ptr, i65 %arg) #0 {
   ret void
 }
 
-define void @local_store_i13(ptr addrspace(3) %ptr, i13 %arg) #0 {
+define void @local_store_i13(ptr addrspace(3) %ptr, i13 %arg) nounwind {
 ; CIVI-LABEL: local_store_i13:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -339,7 +339,7 @@ define void @local_store_i13(ptr addrspace(3) %ptr, i13 %arg) #0 {
   ret void
 }
 
-define void @local_store_i17(ptr addrspace(3) %ptr, i17 %arg) #0 {
+define void @local_store_i17(ptr addrspace(3) %ptr, i17 %arg) nounwind {
 ; CIVI-LABEL: local_store_i17:
 ; CIVI:       ; %bb.0:
 ; CIVI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -379,5 +379,3 @@ define void @local_store_i17(ptr addrspace(3) %ptr, i17 %arg) #0 {
   store i17 %arg, ptr addrspace(3) %ptr, align 8
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
index f3be926f76befd..e537f3f55b4531 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
 ; FIXME: promotion not handled without f16 insts
 
-define half @v_constained_fadd_f16_fpexcept_strict(half %x, half %y) #0 {
+define half @v_constained_fadd_f16_fpexcept_strict(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f16_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -21,7 +21,7 @@ define half @v_constained_fadd_f16_fpexcept_strict(half %x, half %y) #0 {
   ret half %val
 }
 
-define half @v_constained_fadd_f16_fpexcept_ignore(half %x, half %y) #0 {
+define half @v_constained_fadd_f16_fpexcept_ignore(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f16_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -37,7 +37,7 @@ define half @v_constained_fadd_f16_fpexcept_ignore(half %x, half %y) #0 {
   ret half %val
 }
 
-define half @v_constained_fadd_f16_fpexcept_maytrap(half %x, half %y) #0 {
+define half @v_constained_fadd_f16_fpexcept_maytrap(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f16_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -53,7 +53,7 @@ define half @v_constained_fadd_f16_fpexcept_maytrap(half %x, half %y) #0 {
   ret half %val
 }
 
-define <2 x half> @v_constained_fadd_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) #0 {
+define <2 x half> @v_constained_fadd_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) strictfp {
 ; GFX9-LABEL: v_constained_fadd_v2f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -77,7 +77,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
   ret <2 x half> %val
 }
 
-define <2 x half> @v_constained_fadd_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) #0 {
+define <2 x half> @v_constained_fadd_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) strictfp {
 ; GFX9-LABEL: v_constained_fadd_v2f16_fpexcept_ignore:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -101,7 +101,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
   ret <2 x half> %val
 }
 
-define <2 x half> @v_constained_fadd_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) #0 {
+define <2 x half> @v_constained_fadd_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) strictfp {
 ; GFX9-LABEL: v_constained_fadd_v2f16_fpexcept_maytrap:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -125,7 +125,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
   ret <2 x half> %val
 }
 
-define <3 x half> @v_constained_fadd_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) #0 {
+define <3 x half> @v_constained_fadd_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) strictfp {
 ; GFX9-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -153,7 +153,7 @@ define <3 x half> @v_constained_fadd_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 }
 
 ; FIXME: Scalarized
-define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) #0 {
+define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) strictfp {
 ; GFX9-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -206,7 +206,7 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
   ret <4 x half> %val
 }
 
-define amdgpu_ps half @s_constained_fadd_f16_fpexcept_strict(half inreg %x, half inreg %y) #0 {
+define amdgpu_ps half @s_constained_fadd_f16_fpexcept_strict(half inreg %x, half inreg %y) strictfp {
 ; GCN-LABEL: s_constained_fadd_f16_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, s3
@@ -221,7 +221,7 @@ define amdgpu_ps half @s_constained_fadd_f16_fpexcept_strict(half inreg %x, half
   ret half %val
 }
 
-define amdgpu_ps <2 x half> @s_constained_fadd_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 {
+define amdgpu_ps <2 x half> @s_constained_fadd_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) strictfp {
 ; GFX9-LABEL: s_constained_fadd_v2f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s3
@@ -248,10 +248,7 @@ define amdgpu_ps <2 x half> @s_constained_fadd_v2f16_fpexcept_strict(<2 x half>
   ret <2 x half> %val
 }
 
-declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fadd.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fadd.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1
-declare <4 x half> @llvm.experimental.constrained.fadd.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { inaccessiblememonly nounwind willreturn }
+declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <2 x half> @llvm.experimental.constrained.fadd.v2f16(<2 x half>, <2 x half>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <3 x half> @llvm.experimental.constrained.fadd.v3f16(<3 x half>, <3 x half>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <4 x half> @llvm.experimental.constrained.fadd.v4f16(<4 x half>, <4 x half>, metadata, metadata) inaccessiblememonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
index 2aecf5fd8753c9..abbb0727841676 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
 
-define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -19,7 +19,7 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
   ret float %val
 }
 
-define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f32_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -35,7 +35,7 @@ define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 {
   ret float %val
 }
 
-define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f32_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -51,7 +51,7 @@ define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 {
   ret float %val
 }
 
-define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -75,7 +75,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -99,7 +99,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -123,7 +123,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2
   ret <2 x float> %val
 }
 
-define <3 x float> @v_constained_fadd_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
+define <3 x float> @v_constained_fadd_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_v3f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -150,7 +150,7 @@ define <3 x float> @v_constained_fadd_v3f32_fpexcept_strict(<3 x float> %x, <3 x
   ret <3 x float> %val
 }
 
-define amdgpu_ps float @s_constained_fadd_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
+define amdgpu_ps float @s_constained_fadd_f32_fpexcept_strict(float inreg %x, float inreg %y) strictfp {
 ; GCN-LABEL: s_constained_fadd_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, s3
@@ -165,7 +165,7 @@ define amdgpu_ps float @s_constained_fadd_f32_fpexcept_strict(float inreg %x, fl
   ret float %val
 }
 
-define float @v_constained_fadd_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_strict_fabs_lhs(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_lhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -177,12 +177,12 @@ define float @v_constained_fadd_f32_fpexcept_strict_fabs_lhs(float %x, float %y)
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_add_f32_e64 v0, |v0|, v1
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.x = call float @llvm.fabs.f32(float %x) #0
+  %fabs.x = call float @llvm.fabs.f32(float %x) strictfp
   %val = call float @llvm.experimental.constrained.fadd.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
 }
 
-define float @v_constained_fadd_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_strict_fabs_rhs(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_rhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -194,12 +194,12 @@ define float @v_constained_fadd_f32_fpexcept_strict_fabs_rhs(float %x, float %y)
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_add_f32_e64 v0, v0, |v1|
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.y = call float @llvm.fabs.f32(float %y) #0
+  %fabs.y = call float @llvm.fabs.f32(float %y) strictfp
   %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
 }
 
-define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
+define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -211,7 +211,7 @@ define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, v1, |v0|
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.x = call float @llvm.fabs.f32(float %x) #0
+  %fabs.x = call float @llvm.fabs.f32(float %x) strictfp
   %neg.fabs.x = fneg float %fabs.x
   %val = call float @llvm.experimental.constrained.fadd.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
@@ -221,5 +221,3 @@ declare float @llvm.fabs.f32(float)
 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
 declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata)
 declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
index faa0131c88c2d2..7e81b0d0825a45 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
 
-define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
+define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -19,7 +19,7 @@ define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
   ret double %val
 }
 
-define double @v_constained_fadd_f64_fpexcept_ignore(double %x, double %y) #0 {
+define double @v_constained_fadd_f64_fpexcept_ignore(double %x, double %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f64_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -35,7 +35,7 @@ define double @v_constained_fadd_f64_fpexcept_ignore(double %x, double %y) #0 {
   ret double %val
 }
 
-define double @v_constained_fadd_f64_fpexcept_maytrap(double %x, double %y) #0 {
+define double @v_constained_fadd_f64_fpexcept_maytrap(double %x, double %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_f64_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -51,7 +51,7 @@ define double @v_constained_fadd_f64_fpexcept_maytrap(double %x, double %y) #0 {
   ret double %val
 }
 
-define <2 x double> @v_constained_fadd_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) #0 {
+define <2 x double> @v_constained_fadd_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_v2f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -69,7 +69,7 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_strict(<2 x double> %x, <2
   ret <2 x double> %val
 }
 
-define <2 x double> @v_constained_fadd_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) #0 {
+define <2 x double> @v_constained_fadd_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_v2f64_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -87,7 +87,7 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_ignore(<2 x double> %x, <2
   ret <2 x double> %val
 }
 
-define <2 x double> @v_constained_fadd_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) #0 {
+define <2 x double> @v_constained_fadd_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_v2f64_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -105,7 +105,7 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_maytrap(<2 x double> %x, <
   ret <2 x double> %val
 }
 
-define <3 x double> @v_constained_fadd_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) #0 {
+define <3 x double> @v_constained_fadd_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fadd_v3f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -125,7 +125,7 @@ define <3 x double> @v_constained_fadd_v3f64_fpexcept_strict(<3 x double> %x, <3
   ret <3 x double> %val
 }
 
-define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 {
+define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg %x, double inreg %y) strictfp {
 ; GCN-LABEL: s_constained_fadd_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, s4
@@ -142,9 +142,6 @@ define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg
   ret <2 x float> %cast
 }
 
-declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { inaccessiblememonly nounwind willreturn }
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata) inaccessiblememonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
index 407bb002483ec8..907393228a2009 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11 %s
 
-define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0 {
+define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f16_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -26,7 +26,7 @@ define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0
   ret half %val
 }
 
-define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 {
+define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y, <2 x half> %z) strictfp {
 ; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -60,7 +60,7 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x ha
   ret <2 x half> %val
 }
 
-define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y, <3 x half> %z) #0 {
+define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y, <3 x half> %z) strictfp {
 ; GFX9-LABEL: v_constained_fma_v3f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -98,7 +98,7 @@ define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x ha
   ret <3 x half> %val
 }
 
-define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y, <4 x half> %z) #0 {
+define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y, <4 x half> %z) strictfp {
 ; GFX9-LABEL: v_constained_fma_v4f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -173,7 +173,7 @@ define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x ha
   ret <4 x half> %val
 }
 
-define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z) #0 {
+define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -196,7 +196,7 @@ define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z
   ret half %val
 }
 
-define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, half %z) #0 {
+define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, half %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -220,7 +220,7 @@ define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, ha
   ret half %val
 }
 
-define half @v_constained_fma_f16_fpexcept_strict_fabs_fabs(half %x, half %y, half %z) #0 {
+define half @v_constained_fma_f16_fpexcept_strict_fabs_fabs(half %x, half %y, half %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -238,13 +238,13 @@ define half @v_constained_fma_f16_fpexcept_strict_fabs_fabs(half %x, half %y, ha
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_fma_f16 v0, |v0|, |v1|, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %neg.x = call half @llvm.fabs.f16(half %x) #0
-  %neg.y = call half @llvm.fabs.f16(half %y) #0
+  %neg.x = call half @llvm.fabs.f16(half %x) strictfp
+  %neg.y = call half @llvm.fabs.f16(half %y) strictfp
   %val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
 
-define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 {
+define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %x, <2 x half> %y, <2 x half> %z) strictfp {
 ; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -285,5 +285,3 @@ declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata,
 declare <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, metadata, metadata)
 declare <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half>, <3 x half>, <3 x half>, metadata, metadata)
 declare <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
index 922a4dcbea351e..7d7cc2dd4e5a30 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
 
-define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) #0 {
+define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -19,7 +19,7 @@ define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z)
   ret float %val
 }
 
-define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
+define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -37,7 +37,7 @@ define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) #0 {
+define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v3f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -57,7 +57,7 @@ define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x
   ret <3 x float> %val
 }
 
-define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
+define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v4f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -79,7 +79,7 @@ define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x
   ret <4 x float> %val
 }
 
-define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) #0 {
+define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -96,7 +96,7 @@ define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, floa
   ret float %val
 }
 
-define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) #0 {
+define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -114,7 +114,7 @@ define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y,
   ret float %val
 }
 
-define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) #0 {
+define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -126,13 +126,13 @@ define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y,
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %neg.x = call float @llvm.fabs.f32(float %x) #0
-  %neg.y = call float @llvm.fabs.f32(float %y) #0
+  %neg.x = call float @llvm.fabs.f32(float %x) strictfp
+  %neg.y = call float @llvm.fabs.f32(float %y) strictfp
   %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
 }
 
-define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
+define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -157,5 +157,3 @@ declare float @llvm.experimental.constrained.fma.f32(float, float, float, metada
 declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata)
 declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata)
 declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll
index 5cb293ab25806a..2808a7aaab73d2 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
 
-define double @v_constained_fma_f64_fpexcept_strict(double %x, double %y, double %z) #0 {
+define double @v_constained_fma_f64_fpexcept_strict(double %x, double %y, double %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -19,7 +19,7 @@ define double @v_constained_fma_f64_fpexcept_strict(double %x, double %y, double
   ret double %val
 }
 
-define <2 x double> @v_constained_fma_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 {
+define <2 x double> @v_constained_fma_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y, <2 x double> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v2f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -37,7 +37,7 @@ define <2 x double> @v_constained_fma_v2f64_fpexcept_strict(<2 x double> %x, <2
   ret <2 x double> %val
 }
 
-define <3 x double> @v_constained_fma_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y, <3 x double> %z) #0 {
+define <3 x double> @v_constained_fma_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y, <3 x double> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v3f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -57,7 +57,7 @@ define <3 x double> @v_constained_fma_v3f64_fpexcept_strict(<3 x double> %x, <3
   ret <3 x double> %val
 }
 
-define <4 x double> @v_constained_fma_v4f64_fpexcept_strict(<4 x double> %x, <4 x double> %y, <4 x double> %z) #0 {
+define <4 x double> @v_constained_fma_v4f64_fpexcept_strict(<4 x double> %x, <4 x double> %y, <4 x double> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v4f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -79,7 +79,7 @@ define <4 x double> @v_constained_fma_v4f64_fpexcept_strict(<4 x double> %x, <4
   ret <4 x double> %val
 }
 
-define double @v_constained_fma_f64_fpexcept_strict_fneg(double %x, double %y, double %z) #0 {
+define double @v_constained_fma_f64_fpexcept_strict_fneg(double %x, double %y, double %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -96,7 +96,7 @@ define double @v_constained_fma_f64_fpexcept_strict_fneg(double %x, double %y, d
   ret double %val
 }
 
-define double @v_constained_fma_f64_fpexcept_strict_fneg_fneg(double %x, double %y, double %z) #0 {
+define double @v_constained_fma_f64_fpexcept_strict_fneg_fneg(double %x, double %y, double %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -114,7 +114,7 @@ define double @v_constained_fma_f64_fpexcept_strict_fneg_fneg(double %x, double
   ret double %val
 }
 
-define double @v_constained_fma_f64_fpexcept_strict_fabs_fabs(double %x, double %y, double %z) #0 {
+define double @v_constained_fma_f64_fpexcept_strict_fabs_fabs(double %x, double %y, double %z) strictfp {
 ; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fabs_fabs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -126,13 +126,13 @@ define double @v_constained_fma_f64_fpexcept_strict_fabs_fabs(double %x, double
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_fma_f64 v[0:1], |v[0:1]|, |v[2:3]|, v[4:5]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %neg.x = call double @llvm.fabs.f64(double %x) #0
-  %neg.y = call double @llvm.fabs.f64(double %y) #0
+  %neg.x = call double @llvm.fabs.f64(double %x) strictfp
+  %neg.y = call double @llvm.fabs.f64(double %y) strictfp
   %val = call double @llvm.experimental.constrained.fma.f64(double %neg.x, double %neg.y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret double %val
 }
 
-define <2 x double> @v_constained_fma_v2f64_fpexcept_strict_fneg_fneg(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 {
+define <2 x double> @v_constained_fma_v2f64_fpexcept_strict_fneg_fneg(<2 x double> %x, <2 x double> %y, <2 x double> %z) strictfp {
 ; GCN-LABEL: v_constained_fma_v2f64_fpexcept_strict_fneg_fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -157,5 +157,3 @@ declare double @llvm.experimental.constrained.fma.f64(double, double, double, me
 declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
 declare <3 x double> @llvm.experimental.constrained.fma.v3f64(<3 x double>, <3 x double>, <3 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
index d798166a678396..1d94f77a003ff2 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
@@ -14,7 +14,7 @@
 
 ; FIXME: promotion not handled without f16 insts
 
-define half @v_constained_fmul_f16_fpexcept_strict(half %x, half %y) #0 {
+define half @v_constained_fmul_f16_fpexcept_strict(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f16_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -30,7 +30,7 @@ define half @v_constained_fmul_f16_fpexcept_strict(half %x, half %y) #0 {
   ret half %val
 }
 
-define half @v_constained_fmul_f16_fpexcept_ignore(half %x, half %y) #0 {
+define half @v_constained_fmul_f16_fpexcept_ignore(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f16_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -46,7 +46,7 @@ define half @v_constained_fmul_f16_fpexcept_ignore(half %x, half %y) #0 {
   ret half %val
 }
 
-define half @v_constained_fmul_f16_fpexcept_maytrap(half %x, half %y) #0 {
+define half @v_constained_fmul_f16_fpexcept_maytrap(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f16_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -62,7 +62,7 @@ define half @v_constained_fmul_f16_fpexcept_maytrap(half %x, half %y) #0 {
   ret half %val
 }
 
-define <2 x half> @v_constained_fmul_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) #0 {
+define <2 x half> @v_constained_fmul_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) strictfp {
 ; GFX9-LABEL: v_constained_fmul_v2f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -94,7 +94,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
   ret <2 x half> %val
 }
 
-define <2 x half> @v_constained_fmul_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) #0 {
+define <2 x half> @v_constained_fmul_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) strictfp {
 ; GFX9-LABEL: v_constained_fmul_v2f16_fpexcept_ignore:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -126,7 +126,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
   ret <2 x half> %val
 }
 
-define <2 x half> @v_constained_fmul_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) #0 {
+define <2 x half> @v_constained_fmul_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) strictfp {
 ; GFX9-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -158,7 +158,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
   ret <2 x half> %val
 }
 
-define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) #0 {
+define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) strictfp {
 ; GFX9-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -223,7 +223,7 @@ define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 }
 
 ; FIXME: Scalarized
-define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) #0 {
+define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) strictfp {
 ; GFX9-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -308,7 +308,7 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
   ret <4 x half> %val
 }
 
-define amdgpu_ps half @s_constained_fmul_f16_fpexcept_strict(half inreg %x, half inreg %y) #0 {
+define amdgpu_ps half @s_constained_fmul_f16_fpexcept_strict(half inreg %x, half inreg %y) strictfp {
 ; GCN-LABEL: s_constained_fmul_f16_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, s3
@@ -323,7 +323,7 @@ define amdgpu_ps half @s_constained_fmul_f16_fpexcept_strict(half inreg %x, half
   ret half %val
 }
 
-define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 {
+define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) strictfp {
 ; GFX9-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s3
@@ -362,13 +362,10 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half>
   ret <2 x half> %val
 }
 
-declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fmul.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1
-declare <4 x half> @llvm.experimental.constrained.fmul.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { inaccessiblememonly nounwind willreturn }
+declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half>, <2 x half>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <3 x half> @llvm.experimental.constrained.fmul.v3f16(<3 x half>, <3 x half>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <4 x half> @llvm.experimental.constrained.fmul.v4f16(<4 x half>, <4 x half>, metadata, metadata) inaccessiblememonly nounwind willreturn
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX10: {{.*}}
 ; GFX11: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
index 8df2834928395c..5c02ffe65ec78d 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
@@ -6,7 +6,7 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
 
-define float @v_constained_fmul_f32_fpexcept_strict(float %x, float %y) #0 {
+define float @v_constained_fmul_f32_fpexcept_strict(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -22,7 +22,7 @@ define float @v_constained_fmul_f32_fpexcept_strict(float %x, float %y) #0 {
   ret float %val
 }
 
-define float @v_constained_fmul_f32_fpexcept_ignore(float %x, float %y) #0 {
+define float @v_constained_fmul_f32_fpexcept_ignore(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f32_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -38,7 +38,7 @@ define float @v_constained_fmul_f32_fpexcept_ignore(float %x, float %y) #0 {
   ret float %val
 }
 
-define float @v_constained_fmul_f32_fpexcept_maytrap(float %x, float %y) #0 {
+define float @v_constained_fmul_f32_fpexcept_maytrap(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f32_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -54,7 +54,7 @@ define float @v_constained_fmul_f32_fpexcept_maytrap(float %x, float %y) #0 {
   ret float %val
 }
 
-define <2 x float> @v_constained_fmul_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fmul_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -78,7 +78,7 @@ define <2 x float> @v_constained_fmul_v2f32_fpexcept_strict(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <2 x float> @v_constained_fmul_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fmul_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -102,7 +102,7 @@ define <2 x float> @v_constained_fmul_v2f32_fpexcept_ignore(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <2 x float> @v_constained_fmul_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fmul_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_v2f32_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -126,7 +126,7 @@ define <2 x float> @v_constained_fmul_v2f32_fpexcept_maytrap(<2 x float> %x, <2
   ret <2 x float> %val
 }
 
-define <3 x float> @v_constained_fmul_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
+define <3 x float> @v_constained_fmul_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_v3f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -153,7 +153,7 @@ define <3 x float> @v_constained_fmul_v3f32_fpexcept_strict(<3 x float> %x, <3 x
   ret <3 x float> %val
 }
 
-define amdgpu_ps float @s_constained_fmul_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
+define amdgpu_ps float @s_constained_fmul_f32_fpexcept_strict(float inreg %x, float inreg %y) strictfp {
 ; GCN-LABEL: s_constained_fmul_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, s3
@@ -168,7 +168,7 @@ define amdgpu_ps float @s_constained_fmul_f32_fpexcept_strict(float inreg %x, fl
   ret float %val
 }
 
-define float @v_constained_fmul_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
+define float @v_constained_fmul_f32_fpexcept_strict_fabs_lhs(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_lhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -180,12 +180,12 @@ define float @v_constained_fmul_f32_fpexcept_strict_fabs_lhs(float %x, float %y)
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_mul_f32_e64 v0, |v0|, v1
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.x = call float @llvm.fabs.f32(float %x) #0
+  %fabs.x = call float @llvm.fabs.f32(float %x) strictfp
   %val = call float @llvm.experimental.constrained.fmul.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
 }
 
-define float @v_constained_fmul_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
+define float @v_constained_fmul_f32_fpexcept_strict_fabs_rhs(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fabs_rhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -197,12 +197,12 @@ define float @v_constained_fmul_f32_fpexcept_strict_fabs_rhs(float %x, float %y)
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_mul_f32_e64 v0, v0, |v1|
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.y = call float @llvm.fabs.f32(float %y) #0
+  %fabs.y = call float @llvm.fabs.f32(float %y) strictfp
   %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
 }
 
-define float @v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
+define float @v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -214,7 +214,7 @@ define float @v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_mul_f32_e64 v0, -|v0|, v1
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.x = call float @llvm.fabs.f32(float %x) #0
+  %fabs.x = call float @llvm.fabs.f32(float %x) strictfp
   %neg.fabs.x = fneg float %fabs.x
   %val = call float @llvm.experimental.constrained.fmul.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
@@ -224,5 +224,3 @@ declare float @llvm.fabs.f32(float)
 declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
 declare <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float>, <2 x float>, metadata, metadata)
 declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
index 8c98a662c59cc3..8bfea9a6244be4 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
@@ -6,7 +6,7 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
 
-define double @v_constained_fmul_f64_fpexcept_strict(double %x, double %y) #0 {
+define double @v_constained_fmul_f64_fpexcept_strict(double %x, double %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -22,7 +22,7 @@ define double @v_constained_fmul_f64_fpexcept_strict(double %x, double %y) #0 {
   ret double %val
 }
 
-define double @v_constained_fmul_f64_fpexcept_ignore(double %x, double %y) #0 {
+define double @v_constained_fmul_f64_fpexcept_ignore(double %x, double %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f64_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -38,7 +38,7 @@ define double @v_constained_fmul_f64_fpexcept_ignore(double %x, double %y) #0 {
   ret double %val
 }
 
-define double @v_constained_fmul_f64_fpexcept_maytrap(double %x, double %y) #0 {
+define double @v_constained_fmul_f64_fpexcept_maytrap(double %x, double %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_f64_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -54,7 +54,7 @@ define double @v_constained_fmul_f64_fpexcept_maytrap(double %x, double %y) #0 {
   ret double %val
 }
 
-define <2 x double> @v_constained_fmul_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) #0 {
+define <2 x double> @v_constained_fmul_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_v2f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -72,7 +72,7 @@ define <2 x double> @v_constained_fmul_v2f64_fpexcept_strict(<2 x double> %x, <2
   ret <2 x double> %val
 }
 
-define <2 x double> @v_constained_fmul_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) #0 {
+define <2 x double> @v_constained_fmul_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_v2f64_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -90,7 +90,7 @@ define <2 x double> @v_constained_fmul_v2f64_fpexcept_ignore(<2 x double> %x, <2
   ret <2 x double> %val
 }
 
-define <2 x double> @v_constained_fmul_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) #0 {
+define <2 x double> @v_constained_fmul_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_v2f64_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -108,7 +108,7 @@ define <2 x double> @v_constained_fmul_v2f64_fpexcept_maytrap(<2 x double> %x, <
   ret <2 x double> %val
 }
 
-define <3 x double> @v_constained_fmul_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) #0 {
+define <3 x double> @v_constained_fmul_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fmul_v3f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -128,7 +128,7 @@ define <3 x double> @v_constained_fmul_v3f64_fpexcept_strict(<3 x double> %x, <3
   ret <3 x double> %val
 }
 
-define amdgpu_ps <2 x float> @s_constained_fmul_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 {
+define amdgpu_ps <2 x float> @s_constained_fmul_f64_fpexcept_strict(double inreg %x, double inreg %y) strictfp {
 ; GCN-LABEL: s_constained_fmul_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, s4
@@ -145,9 +145,6 @@ define amdgpu_ps <2 x float> @s_constained_fmul_f64_fpexcept_strict(double inreg
   ret <2 x float> %cast
 }
 
-declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { inaccessiblememonly nounwind willreturn }
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata) inaccessiblememonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fpext.ll b/llvm/test/CodeGen/AMDGPU/strict_fpext.ll
index df6c1a0aa8c465..6c152b3e6db280 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fpext.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fpext.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
 
-define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
+define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) strictfp {
 ; SI-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,7 +29,7 @@ define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
   ret float %result
 }
 
-define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half> %arg) #0 {
+define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half> %arg) strictfp {
 ; SI-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -68,7 +68,7 @@ define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half
   ret <2 x float>   %result
 }
 
-define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half> %arg) #0 {
+define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half> %arg) strictfp {
 ; SI-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -116,7 +116,7 @@ define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half
   ret <3 x float>   %result
 }
 
-define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
+define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) strictfp {
 ; GCN-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -126,7 +126,7 @@ define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
   ret double %result
 }
 
-define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) #0 {
+define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) strictfp {
 ; GCN-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -138,7 +138,7 @@ define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x flo
   ret <2 x double>   %result
 }
 
-define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x float> %arg) #0 {
+define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x float> %arg) strictfp {
 ; SI-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -172,7 +172,7 @@ define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x flo
   ret <3 x double>   %result
 }
 
-define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 {
+define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) strictfp {
 ; SI-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -199,7 +199,7 @@ define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 {
   ret double %result
 }
 
-define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x half> %arg) #0 {
+define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x half> %arg) strictfp {
 ; SI-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -244,7 +244,7 @@ define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x hal
   ret <2 x double>   %result
 }
 
-define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x half> %arg) #0 {
+define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x half> %arg) strictfp {
 ; SI-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -299,7 +299,7 @@ define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x hal
   ret <3 x double>   %result
 }
 
-define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
+define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) strictfp {
 ; SI-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -327,7 +327,7 @@ define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0
   ret float %neg.result
 }
 
-define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0 {
+define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) strictfp {
 ; SI-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -353,7 +353,7 @@ define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0
   ret float %result
 }
 
-define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #0 {
+define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) strictfp {
 ; GCN-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -364,7 +364,7 @@ define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #
   ret double %result
 }
 
-define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
+define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) strictfp {
 ; GCN-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -376,7 +376,7 @@ define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #
   ret double %neg.result
 }
 
-define float @v_constrained_fpext_f16_to_f32_noabi(ptr addrspace(1) %ptr) #0 {
+define float @v_constrained_fpext_f16_to_f32_noabi(ptr addrspace(1) %ptr) strictfp {
 ; SI-LABEL: v_constrained_fpext_f16_to_f32_noabi:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -425,7 +425,7 @@ define float @v_constrained_fpext_f16_to_f32_noabi(ptr addrspace(1) %ptr) #0 {
   ret float %result
 }
 
-define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_noabi(ptr addrspace(1) %ptr) #0 {
+define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_noabi(ptr addrspace(1) %ptr) strictfp {
 ; SI-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -481,17 +481,14 @@ define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_noabi(ptr addrspace(1) %p
   ret <2 x float> %result
 }
 
-declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) #1
+declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
 
-declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) #1
+declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
 
-declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1
-declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
index 0f80327638a9cb..f8aa6b9d204aaf 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
 
-define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
+define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) strictfp {
 ; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,7 +29,7 @@ define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
   ret half %val
 }
 
-define <2 x half> @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict(<2 x float> %arg) #0 {
+define <2 x half> @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict(<2 x float> %arg) strictfp {
 ; SI-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -69,7 +69,7 @@ define <2 x half> @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict(<2 x flo
   ret <2 x half> %val
 }
 
-define <3 x half> @v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict(<3 x float> %arg) #0 {
+define <3 x half> @v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict(<3 x float> %arg) strictfp {
 ; SI-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -115,7 +115,7 @@ define <3 x half> @v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict(<3 x flo
   ret <3 x half> %val
 }
 
-define float @v_constrained_fptrunc_f64_to_f32_fpexcept_strict(double %arg) #0 {
+define float @v_constrained_fptrunc_f64_to_f32_fpexcept_strict(double %arg) strictfp {
 ; GCN-LABEL: v_constrained_fptrunc_f64_to_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -125,7 +125,7 @@ define float @v_constrained_fptrunc_f64_to_f32_fpexcept_strict(double %arg) #0 {
   ret float %val
 }
 
-define <2 x float> @v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict(<2 x double> %arg) #0 {
+define <2 x float> @v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict(<2 x double> %arg) strictfp {
 ; GCN-LABEL: v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -136,7 +136,7 @@ define <2 x float> @v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict(<2 x do
   ret <2 x float> %val
 }
 
-define <3 x float> @v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict(<3 x double> %arg) #0 {
+define <3 x float> @v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict(<3 x double> %arg) strictfp {
 ; GCN-LABEL: v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -149,22 +149,22 @@ define <3 x float> @v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict(<3 x do
 }
 
 ; FIXME:
-; define half @v_constrained_fptrunc_f64_to_f16_fpexcept_strict(double %arg) #0 {
+; define half @v_constrained_fptrunc_f64_to_f16_fpexcept_strict(double %arg) strictfp {
 ;   %val = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
 ;   ret half %val
 ; }
 
-; define <2 x half> @v_constrained_fptrunc_v2f64_to_v2f16_fpexcept_strict(<2 x double> %arg) #0 {
+; define <2 x half> @v_constrained_fptrunc_v2f64_to_v2f16_fpexcept_strict(<2 x double> %arg) strictfp {
 ;   %val = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
 ;   ret <2 x half> %val
 ; }
 
-; define <3 x half> @v_constrained_fptrunc_v3f64_to_v3f16_fpexcept_strict(<3 x double> %arg) #0 {
+; define <3 x half> @v_constrained_fptrunc_v3f64_to_v3f16_fpexcept_strict(<3 x double> %arg) strictfp {
 ;   %val = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
 ;   ret <3 x half> %val
 ; }
 
-define half @v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
+define half @v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict(float %arg) strictfp {
 ; SI-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -192,7 +192,7 @@ define half @v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #
   ret half %neg.val
 }
 
-define half @v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict(float %arg) #0 {
+define half @v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict(float %arg) strictfp {
 ; SI-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -217,7 +217,7 @@ define half @v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict(float %arg) #
   ret half %val
 }
 
-define float @v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict(double %arg) #0 {
+define float @v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict(double %arg) strictfp {
 ; GCN-LABEL: v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -229,7 +229,7 @@ define float @v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict(double %arg)
   ret float %neg.val
 }
 
-define float @v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict(double %arg) #0 {
+define float @v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict(double %arg) strictfp {
 ; GCN-LABEL: v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -240,7 +240,7 @@ define float @v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict(double %arg)
   ret float %val
 }
 
-define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi(float %arg, ptr addrspace(1) %ptr) #0 {
+define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi(float %arg, ptr addrspace(1) %ptr) strictfp {
 ; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -264,7 +264,7 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi(float %arg,
   ret void
 }
 
-define void @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi(<2 x float> %arg, ptr addrspace(1) %ptr) #0 {
+define void @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi(<2 x float> %arg, ptr addrspace(1) %ptr) strictfp {
 ; SI-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -329,7 +329,7 @@ define void @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi(<2 x flo
   ret void
 }
 
-define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg(float %arg, ptr addrspace(1) %ptr) #0 {
+define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg(float %arg, ptr addrspace(1) %ptr) strictfp {
 ; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -354,7 +354,7 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg(float %
   ret void
 }
 
-define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %arg, ptr addrspace(1) %ptr) #0 {
+define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %arg, ptr addrspace(1) %ptr) strictfp {
 ; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -374,7 +374,7 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %
 ; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1011-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
 ; GFX1011-NEXT:    s_setpc_b64 s[30:31]
-  %abs.arg = call float @llvm.fabs.f32(float %arg) #0
+  %abs.arg = call float @llvm.fabs.f32(float %arg) strictfp
   %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret void
 }
@@ -392,5 +392,3 @@ declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x doubl
 declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata)
 
 declare float @llvm.fabs.f32(float)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
index 3420596da2aacb..d92394fea07685 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
@@ -13,7 +13,7 @@
 
 ; FIXME: promotion not handled without f16 insts
 
-define half @v_constained_fsub_f16_fpexcept_strict(half %x, half %y) #0 {
+define half @v_constained_fsub_f16_fpexcept_strict(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f16_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,7 +29,7 @@ define half @v_constained_fsub_f16_fpexcept_strict(half %x, half %y) #0 {
   ret half %val
 }
 
-define half @v_constained_fsub_f16_fpexcept_ignore(half %x, half %y) #0 {
+define half @v_constained_fsub_f16_fpexcept_ignore(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f16_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -45,7 +45,7 @@ define half @v_constained_fsub_f16_fpexcept_ignore(half %x, half %y) #0 {
   ret half %val
 }
 
-define half @v_constained_fsub_f16_fpexcept_maytrap(half %x, half %y) #0 {
+define half @v_constained_fsub_f16_fpexcept_maytrap(half %x, half %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -61,7 +61,7 @@ define half @v_constained_fsub_f16_fpexcept_maytrap(half %x, half %y) #0 {
   ret half %val
 }
 
-define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) #0 {
+define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) strictfp {
 ; GFX9-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -127,7 +127,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
   ret <2 x half> %val
 }
 
-define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) #0 {
+define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) strictfp {
 ; GFX9-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -193,7 +193,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
   ret <2 x half> %val
 }
 
-define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) #0 {
+define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) strictfp {
 ; GFX9-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -259,7 +259,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
   ret <2 x half> %val
 }
 
-define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) #0 {
+define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) strictfp {
 ; GFX9-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -343,7 +343,7 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 }
 
 ; FIXME: Scalarized
-define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) #0 {
+define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) strictfp {
 ; GFX9-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -448,7 +448,7 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
   ret <4 x half> %val
 }
 
-define amdgpu_ps half @s_constained_fsub_f16_fpexcept_strict(half inreg %x, half inreg %y) #0 {
+define amdgpu_ps half @s_constained_fsub_f16_fpexcept_strict(half inreg %x, half inreg %y) strictfp {
 ; GCN-LABEL: s_constained_fsub_f16_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, s3
@@ -463,7 +463,7 @@ define amdgpu_ps half @s_constained_fsub_f16_fpexcept_strict(half inreg %x, half
   ret half %val
 }
 
-define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 {
+define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) strictfp {
 ; GFX9-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
 ; GFX9-SDAG:       ; %bb.0:
 ; GFX9-SDAG-NEXT:    s_lshr_b32 s0, s3, 16
@@ -542,13 +542,10 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
   ret <2 x half> %val
 }
 
-declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fsub.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1
-declare <4 x half> @llvm.experimental.constrained.fsub.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { inaccessiblememonly nounwind willreturn }
+declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half>, <2 x half>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <3 x half> @llvm.experimental.constrained.fsub.v3f16(<3 x half>, <3 x half>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <4 x half> @llvm.experimental.constrained.fsub.v4f16(<4 x half>, <4 x half>, metadata, metadata) inaccessiblememonly nounwind willreturn
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX8: {{.*}}
 ; GFX9: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
index 23dbe21379f7ff..6a99662a6b685a 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
@@ -8,7 +8,7 @@
 ; RUN: llc -global-isel= -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
 
-define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 {
+define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -24,7 +24,7 @@ define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 {
   ret float %val
 }
 
-define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) #0 {
+define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -40,7 +40,7 @@ define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) #0 {
   ret float %val
 }
 
-define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) #0 {
+define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -56,7 +56,7 @@ define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) #0 {
   ret float %val
 }
 
-define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -80,7 +80,7 @@ define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -104,7 +104,7 @@ define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x
   ret <2 x float> %val
 }
 
-define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
+define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -128,7 +128,7 @@ define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2
   ret <2 x float> %val
 }
 
-define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
+define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -155,7 +155,7 @@ define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x
   ret <3 x float> %val
 }
 
-define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
+define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, float inreg %y) strictfp {
 ; GCN-LABEL: s_constained_fsub_f32_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, s3
@@ -170,7 +170,7 @@ define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, fl
   ret float %val
 }
 
-define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
+define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -182,12 +182,12 @@ define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y)
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, |v0|, v1
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.x = call float @llvm.fabs.f32(float %x) #0
+  %fabs.x = call float @llvm.fabs.f32(float %x) strictfp
   %val = call float @llvm.experimental.constrained.fsub.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
 }
 
-define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
+define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -199,12 +199,12 @@ define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y)
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, v0, |v1|
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.y = call float @llvm.fabs.f32(float %y) #0
+  %fabs.y = call float @llvm.fabs.f32(float %y) strictfp
   %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
 }
 
-define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
+define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -216,7 +216,7 @@ define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, -|v0|, v1
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %fabs.x = call float @llvm.fabs.f32(float %x) #0
+  %fabs.x = call float @llvm.fabs.f32(float %x) strictfp
   %neg.fabs.x = fneg float %fabs.x
   %val = call float @llvm.experimental.constrained.fsub.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret float %val
@@ -226,5 +226,3 @@ declare float @llvm.fabs.f32(float)
 declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
 declare <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float>, metadata, metadata)
 declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-
-attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
index e7d136c377079e..6bbcb2fef9c68b 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
@@ -6,7 +6,7 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
 
-define double @v_constained_fsub_f64_fpexcept_strict(double %x, double %y) #0 {
+define double @v_constained_fsub_f64_fpexcept_strict(double %x, double %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -22,7 +22,7 @@ define double @v_constained_fsub_f64_fpexcept_strict(double %x, double %y) #0 {
   ret double %val
 }
 
-define double @v_constained_fsub_f64_fpexcept_ignore(double %x, double %y) #0 {
+define double @v_constained_fsub_f64_fpexcept_ignore(double %x, double %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f64_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -38,7 +38,7 @@ define double @v_constained_fsub_f64_fpexcept_ignore(double %x, double %y) #0 {
   ret double %val
 }
 
-define double @v_constained_fsub_f64_fpexcept_maytrap(double %x, double %y) #0 {
+define double @v_constained_fsub_f64_fpexcept_maytrap(double %x, double %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_f64_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -54,7 +54,7 @@ define double @v_constained_fsub_f64_fpexcept_maytrap(double %x, double %y) #0 {
   ret double %val
 }
 
-define <2 x double> @v_constained_fsub_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) #0 {
+define <2 x double> @v_constained_fsub_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -72,7 +72,7 @@ define <2 x double> @v_constained_fsub_v2f64_fpexcept_strict(<2 x double> %x, <2
   ret <2 x double> %val
 }
 
-define <2 x double> @v_constained_fsub_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) #0 {
+define <2 x double> @v_constained_fsub_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_ignore:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -90,7 +90,7 @@ define <2 x double> @v_constained_fsub_v2f64_fpexcept_ignore(<2 x double> %x, <2
   ret <2 x double> %val
 }
 
-define <2 x double> @v_constained_fsub_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) #0 {
+define <2 x double> @v_constained_fsub_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_maytrap:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -108,7 +108,7 @@ define <2 x double> @v_constained_fsub_v2f64_fpexcept_maytrap(<2 x double> %x, <
   ret <2 x double> %val
 }
 
-define <3 x double> @v_constained_fsub_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) #0 {
+define <3 x double> @v_constained_fsub_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) strictfp {
 ; GCN-LABEL: v_constained_fsub_v3f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -128,7 +128,7 @@ define <3 x double> @v_constained_fsub_v3f64_fpexcept_strict(<3 x double> %x, <3
   ret <3 x double> %val
 }
 
-define amdgpu_ps <2 x float> @s_constained_fsub_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 {
+define amdgpu_ps <2 x float> @s_constained_fsub_f64_fpexcept_strict(double inreg %x, double inreg %y) strictfp {
 ; GCN-LABEL: s_constained_fsub_f64_fpexcept_strict:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    v_mov_b32_e32 v0, s4
@@ -145,9 +145,6 @@ define amdgpu_ps <2 x float> @s_constained_fsub_f64_fpexcept_strict(double inreg
   ret <2 x float> %cast
 }
 
-declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { inaccessiblememonly nounwind willreturn }
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) inaccessiblememonly nounwind willreturn
+declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata) inaccessiblememonly nounwind willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
index 1f3f17c3e0c462..ee675b2e2b2a8e 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
@@ -10,12 +10,12 @@
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
 
-; define half @test_ldexp_f16_i16(ptr addrspace(1) %out, half %a, i16 %b) #0 {
+; define half @test_ldexp_f16_i16(ptr addrspace(1) %out, half %a, i16 %b) strictfp {
 ;   %result = call half @llvm.experimental.constrained.ldexp.f16.i16(half %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
 ;   ret half %result
 ; }
 
-define half @test_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #0 {
+define half @test_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) strictfp {
 ; GFX8-SDAG-LABEL: test_ldexp_f16_i32:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -73,12 +73,12 @@ define half @test_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #0 {
   ret half %result
 }
 
-; define <2 x half> @test_ldexp_v2f16_v2i16(ptr addrspace(1) %out, <2 x half> %a, <2 x i16> %b) #0 {
+; define <2 x half> @test_ldexp_v2f16_v2i16(ptr addrspace(1) %out, <2 x half> %a, <2 x i16> %b) strictfp {
 ;   %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
 ;   ret <2 x half> %result
 ; }
 
-define <2 x half> @test_ldexp_v2f16_v2i32(ptr addrspace(1) %out, <2 x half> %a, <2 x i32> %b) #0 {
+define <2 x half> @test_ldexp_v2f16_v2i32(ptr addrspace(1) %out, <2 x half> %a, <2 x i32> %b) strictfp {
 ; GFX8-SDAG-LABEL: test_ldexp_v2f16_v2i32:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -161,7 +161,7 @@ define <2 x half> @test_ldexp_v2f16_v2i32(ptr addrspace(1) %out, <2 x half> %a,
   ret <2 x half> %result
 }
 
-define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a, <3 x i32> %b) #0 {
+define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a, <3 x i32> %b) strictfp {
 ; GFX8-SDAG-LABEL: test_ldexp_v3f16_v3i32:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -257,7 +257,7 @@ define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a,
   ret <3 x half> %result
 }
 
-define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a, <4 x i32> %b) #0 {
+define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a, <4 x i32> %b) strictfp {
 ; GFX8-SDAG-LABEL: test_ldexp_v4f16_v4i32:
 ; GFX8-SDAG:       ; %bb.0:
 ; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -376,15 +376,12 @@ define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a,
   ret <4 x half> %result
 }
 
-declare half @llvm.experimental.constrained.ldexp.f16.i16(half, i16, metadata, metadata) #1
-declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half>, <3 x i32>, metadata, metadata) #1
-declare <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half>, <4 x i32>, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+declare half @llvm.experimental.constrained.ldexp.f16.i16(half, i16, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half>, <3 x i32>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half>, <4 x i32>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
 ; GFX11: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll
index 456f0bd2434fb8..978894f057ccee 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll
@@ -9,12 +9,12 @@
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
 
-; define float @test_ldexp_f32_i16(ptr addrspace(1) %out, float %a, i16 %b) #0 {
+; define float @test_ldexp_f32_i16(ptr addrspace(1) %out, float %a, i16 %b) strictfp {
 ;   %result = call float @llvm.experimental.constrained.ldexp.f32.i16(float %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
 ;   ret float %result
 ; }
 
-define float @test_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) #0 {
+define float @test_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) strictfp {
 ; GFX6-LABEL: test_ldexp_f32_i32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -42,12 +42,12 @@ define float @test_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) #0 {
   ret float %result
 }
 
-; define <2 x float> @test_ldexp_v2f32_v2i16(ptr addrspace(1) %out, <2 x float> %a, <2 x i16> %b) #0 {
+; define <2 x float> @test_ldexp_v2f32_v2i16(ptr addrspace(1) %out, <2 x float> %a, <2 x i16> %b) strictfp {
 ;   %result = call <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i16(<2 x float> %a, <2 x i16> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
 ;   ret <2 x float> %result
 ; }
 
-define <2 x float> @test_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a, <2 x i32> %b) #0 {
+define <2 x float> @test_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a, <2 x i32> %b) strictfp {
 ; GFX6-SDAG-LABEL: test_ldexp_v2f32_v2i32:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -100,7 +100,7 @@ define <2 x float> @test_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a
   ret <2 x float> %result
 }
 
-define <3 x float> @test_ldexp_v3f32_v3i32(ptr addrspace(1) %out, <3 x float> %a, <3 x i32> %b) #0 {
+define <3 x float> @test_ldexp_v3f32_v3i32(ptr addrspace(1) %out, <3 x float> %a, <3 x i32> %b) strictfp {
 ; GFX6-SDAG-LABEL: test_ldexp_v3f32_v3i32:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -163,7 +163,7 @@ define <3 x float> @test_ldexp_v3f32_v3i32(ptr addrspace(1) %out, <3 x float> %a
   ret <3 x float> %result
 }
 
-define <4 x float> @test_ldexp_v4f32_v4i32(ptr addrspace(1) %out, <4 x float> %a, <4 x i32> %b) #0 {
+define <4 x float> @test_ldexp_v4f32_v4i32(ptr addrspace(1) %out, <4 x float> %a, <4 x i32> %b) strictfp {
 ; GFX6-SDAG-LABEL: test_ldexp_v4f32_v4i32:
 ; GFX6-SDAG:       ; %bb.0:
 ; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -236,15 +236,12 @@ define <4 x float> @test_ldexp_v4f32_v4i32(ptr addrspace(1) %out, <4 x float> %a
   ret <4 x float> %result
 }
 
-declare float @llvm.experimental.constrained.ldexp.f32.i16(float, i16, metadata, metadata) #1
-declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i16(<2 x float>, <2 x i16>, metadata, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>, metadata, metadata) #1
-declare <3 x float> @llvm.experimental.constrained.ldexp.v3f32.v3i32(<3 x float>, <3 x i32>, metadata, metadata) #1
-declare <4 x float> @llvm.experimental.constrained.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+declare float @llvm.experimental.constrained.ldexp.f32.i16(float, i16, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i16(<2 x float>, <2 x i16>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <3 x float> @llvm.experimental.constrained.ldexp.v3f32.v3i32(<3 x float>, <3 x i32>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <4 x float> @llvm.experimental.constrained.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
 ; GFX11-GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll
index b31625f8f70736..e8a9902c319ff1 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll
@@ -9,12 +9,12 @@
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
 
-; define double @test_ldexp_f64_i16(ptr addrspace(1) %out, double %a, i16 %b) #0 {
+; define double @test_ldexp_f64_i16(ptr addrspace(1) %out, double %a, i16 %b) strictfp {
 ;   %result = call double @llvm.experimental.constrained.ldexp.f64.i16(double %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
 ;   ret double %result
 ; }
 
-define double @test_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) #0 {
+define double @test_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) strictfp {
 ; GCN-LABEL: test_ldexp_f64_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -24,12 +24,12 @@ define double @test_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) #0 {
   ret double %result
 }
 
-; define <2 x double> @test_ldexp_v2f64_v2i16(ptr addrspace(1) %out, <2 x double> %a, <2 x i16> %b) #0 {
+; define <2 x double> @test_ldexp_v2f64_v2i16(ptr addrspace(1) %out, <2 x double> %a, <2 x i16> %b) strictfp {
 ;   %result = call <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i16(<2 x double> %a, <2 x i16> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
 ;   ret <2 x double> %result
 ; }
 
-define <2 x double> @test_ldexp_v2f64_v2i32(ptr addrspace(1) %out, <2 x double> %a, <2 x i32> %b) #0 {
+define <2 x double> @test_ldexp_v2f64_v2i32(ptr addrspace(1) %out, <2 x double> %a, <2 x i32> %b) strictfp {
 ; GCN-LABEL: test_ldexp_v2f64_v2i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -40,7 +40,7 @@ define <2 x double> @test_ldexp_v2f64_v2i32(ptr addrspace(1) %out, <2 x double>
   ret <2 x double> %result
 }
 
-define <3 x double> @test_ldexp_v3f64_v3i32(ptr addrspace(1) %out, <3 x double> %a, <3 x i32> %b) #0 {
+define <3 x double> @test_ldexp_v3f64_v3i32(ptr addrspace(1) %out, <3 x double> %a, <3 x i32> %b) strictfp {
 ; GCN-LABEL: test_ldexp_v3f64_v3i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -52,7 +52,7 @@ define <3 x double> @test_ldexp_v3f64_v3i32(ptr addrspace(1) %out, <3 x double>
   ret <3 x double> %result
 }
 
-define <4 x double> @test_ldexp_v4f64_v4i32(ptr addrspace(1) %out, <4 x double> %a, <4 x i32> %b) #0 {
+define <4 x double> @test_ldexp_v4f64_v4i32(ptr addrspace(1) %out, <4 x double> %a, <4 x i32> %b) strictfp {
 ; GCN-LABEL: test_ldexp_v4f64_v4i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -65,15 +65,12 @@ define <4 x double> @test_ldexp_v4f64_v4i32(ptr addrspace(1) %out, <4 x double>
   ret <4 x double> %result
 }
 
-declare double @llvm.experimental.constrained.ldexp.f64.i16(double, i16, metadata, metadata) #1
-declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i16(<2 x double>, <2 x i16>, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>, metadata, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.ldexp.v3f64.v3i32(<3 x double>, <3 x i32>, metadata, metadata) #1
-declare <4 x double> @llvm.experimental.constrained.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>, metadata, metadata) #1
-
-attributes #0 = { strictfp }
-attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+declare double @llvm.experimental.constrained.ldexp.f64.i16(double, i16, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i16(<2 x double>, <2 x i16>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <3 x double> @llvm.experimental.constrained.ldexp.v3f64.v3i32(<3 x double>, <3 x i32>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare <4 x double> @llvm.experimental.constrained.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>, metadata, metadata) nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX11: {{.*}}
 ; GFX11-GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
index 3bf7fec81c0413..3cd4e3bba5075a 100644
--- a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
+++ b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
@@ -1,19 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
 
-declare void @f16_user(half) #0
-declare half @f16_result() #0
+declare void @f16_user(half) strictfp
+declare half @f16_result() strictfp
 
-declare void @v2f16_user(<2 x half>) #0
-declare <2 x half> @v2f16_result() #0
+declare void @v2f16_user(<2 x half>) strictfp
+declare <2 x half> @v2f16_result() strictfp
 
-declare void @v4f16_user(<4 x half>) #0
-declare <4 x half> @v4f16_result() #0
+declare void @v4f16_user(<4 x half>) strictfp
+declare <4 x half> @v4f16_result() strictfp
 
-declare void @v8f16_user(<8 x half>) #0
-declare <8 x half> @v8f16_result() #0
+declare void @v8f16_user(<8 x half>) strictfp
+declare <8 x half> @v8f16_result() strictfp
 
-define void @f16_arg(half %arg, ptr %ptr) #0 {
+define void @f16_arg(half %arg, ptr %ptr) strictfp {
 ; GFX7-LABEL: f16_arg:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -28,7 +28,7 @@ define void @f16_arg(half %arg, ptr %ptr) #0 {
   ret void
 }
 
-define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 {
+define void @v2f16_arg(<2 x half> %arg, ptr %ptr) strictfp {
 ; GFX7-LABEL: v2f16_arg:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -49,7 +49,7 @@ define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 {
   ret void
 }
 
-define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
+define void @v3f16_arg(<3 x half> %arg, ptr %ptr) strictfp {
 ; GFX7-LABEL: v3f16_arg:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -76,7 +76,7 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
   ret void
 }
 
-define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
+define void @v4f16_arg(<4 x half> %arg, ptr %ptr) strictfp {
 ; GFX7-LABEL: v4f16_arg:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -109,7 +109,7 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
   ret void
 }
 
-define half @f16_return(float %arg) #0 {
+define half @f16_return(float %arg) strictfp {
 ; GFX7-LABEL: f16_return:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -121,7 +121,7 @@ define half @f16_return(float %arg) #0 {
   ret half %fptrunc
 }
 
-define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
+define <2 x half> @v2f16_return(<2 x float> %arg) strictfp {
 ; GFX7-LABEL: v2f16_return:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -136,7 +136,7 @@ define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
   ret <2 x half> %fptrunc
 }
 
-define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
+define <3 x half> @v3f16_return(<3 x float> %arg) strictfp {
 ; GFX7-LABEL: v3f16_return:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -154,7 +154,7 @@ define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
   ret <3 x half> %fptrunc
 }
 
-define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
+define <4 x half> @v4f16_return(<4 x float> %arg) strictfp {
 ; GFX7-LABEL: v4f16_return:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -175,7 +175,7 @@ define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
   ret <4 x half> %fptrunc
 }
 
-define void @outgoing_f16_arg(ptr %ptr) #0 {
+define void @outgoing_f16_arg(ptr %ptr) strictfp {
 ; GFX7-LABEL: outgoing_f16_arg:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -209,7 +209,7 @@ define void @outgoing_f16_arg(ptr %ptr) #0 {
   ret void
 }
 
-define void @outgoing_v2f16_arg(ptr %ptr) #0 {
+define void @outgoing_v2f16_arg(ptr %ptr) strictfp {
 ; GFX7-LABEL: outgoing_v2f16_arg:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -245,7 +245,7 @@ define void @outgoing_v2f16_arg(ptr %ptr) #0 {
   ret void
 }
 
-define void @outgoing_f16_return(ptr %ptr) #0 {
+define void @outgoing_f16_return(ptr %ptr) strictfp {
 ; GFX7-LABEL: outgoing_f16_return:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -287,7 +287,7 @@ define void @outgoing_f16_return(ptr %ptr) #0 {
   ret void
 }
 
-define void @outgoing_v2f16_return(ptr %ptr) #0 {
+define void @outgoing_v2f16_return(ptr %ptr) strictfp {
 ; GFX7-LABEL: outgoing_v2f16_return:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -335,7 +335,7 @@ define void @outgoing_v2f16_return(ptr %ptr) #0 {
   ret void
 }
 
-define void @outgoing_v4f16_return(ptr %ptr) #0 {
+define void @outgoing_v4f16_return(ptr %ptr) strictfp {
 ; GFX7-LABEL: outgoing_v4f16_return:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -396,7 +396,7 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
   ret void
 }
 
-define void @outgoing_v8f16_return(ptr %ptr) #0 {
+define void @outgoing_v8f16_return(ptr %ptr) strictfp {
 ; GFX7-LABEL: outgoing_v8f16_return:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -483,7 +483,7 @@ define void @outgoing_v8f16_return(ptr %ptr) #0 {
   ret void
 }
 
-define half @call_split_type_used_outside_block_v8f16() #0 {
+define half @call_split_type_used_outside_block_v8f16() strictfp {
 ; GFX7-LABEL: call_split_type_used_outside_block_v8f16:
 ; GFX7:       ; %bb.0: ; %bb0
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -545,14 +545,12 @@ bb1:
   ret half %extract
 }
 
-declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0
-declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0
-declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #0
-declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) #0
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) strictfp
+declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) strictfp
+declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) strictfp
+declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) strictfp
 
-declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0
-declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0
-declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #0
-declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) #0
-
-attributes #0 = { strictfp }
+declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) strictfp
+declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) strictfp
+declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) strictfp
+declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) strictfp
diff --git a/llvm/test/CodeGen/AMDGPU/sub.i16.ll b/llvm/test/CodeGen/AMDGPU/sub.i16.ll
index 2b1577e8320512..7b615d45813e93 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.i16.ll
@@ -7,7 +7,7 @@
 ; VI: flat_load_ushort [[B:v[0-9]+]]
 ; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; VI-NEXT: buffer_store_short [[ADD]]
-define amdgpu_kernel void @v_test_sub_i16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -24,7 +24,7 @@ define amdgpu_kernel void @v_test_sub_i16(ptr addrspace(1) %out, ptr addrspace(1
 ; VI: flat_load_ushort [[A:v[0-9]+]]
 ; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0xff85, [[A]]
 ; VI-NEXT: buffer_store_short [[ADD]]
-define amdgpu_kernel void @v_test_sub_i16_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_sub_i16_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -39,7 +39,7 @@ define amdgpu_kernel void @v_test_sub_i16_constant(ptr addrspace(1) %out, ptr ad
 ; VI: flat_load_ushort [[A:v[0-9]+]]
 ; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 0x34d, [[A]]
 ; VI-NEXT: buffer_store_short [[ADD]]
-define amdgpu_kernel void @v_test_sub_i16_neg_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_sub_i16_neg_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -54,7 +54,7 @@ define amdgpu_kernel void @v_test_sub_i16_neg_constant(ptr addrspace(1) %out, pt
 ; VI: flat_load_ushort [[A:v[0-9]+]]
 ; VI: v_subrev_u16_e32 [[ADD:v[0-9]+]], 63, [[A]]
 ; VI-NEXT: buffer_store_short [[ADD]]
-define amdgpu_kernel void @v_test_sub_i16_inline_63(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_sub_i16_inline_63(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -70,7 +70,7 @@ define amdgpu_kernel void @v_test_sub_i16_inline_63(ptr addrspace(1) %out, ptr a
 ; VI: flat_load_ushort [[B:v[0-9]+]]
 ; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; VI-NEXT: buffer_store_dword [[ADD]]
-define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -90,7 +90,7 @@ define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(ptr addrspace(1) %out, ptr
 ; VI: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
 ; VI-DAG: v_sub_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]]
 ; VI: buffer_store_dwordx2 v[[[ADD]]:[[VZERO]]], off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i64, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -110,7 +110,7 @@ define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(ptr addrspace(1) %out, ptr
 ; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; VI: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16
 ; VI-NEXT: buffer_store_dword [[SEXT]]
-define amdgpu_kernel void @v_test_sub_i16_sext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16_sext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -131,7 +131,7 @@ define amdgpu_kernel void @v_test_sub_i16_sext_to_i32(ptr addrspace(1) %out, ptr
 ; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16
 ; VI:      v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
 ; VI-NEXT: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @v_test_sub_i16_sext_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_i16_sext_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds i64, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds i16, ptr addrspace(1) %in0, i32 %tid
@@ -149,7 +149,7 @@ define amdgpu_kernel void @v_test_sub_i16_sext_to_i64(ptr addrspace(1) %out, ptr
 ; GCN-LABEL: {{^}}v_test_sub_i16_constant_commute:
 ; VI: v_subrev_u16_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}}
 ; CI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 0x800, v{{[0-9]+}}
-define amdgpu_kernel void @v_test_sub_i16_constant_commute(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_sub_i16_constant_commute(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
   %size = call i32 @llvm.amdgcn.groupstaticsize()
   %size.trunc = trunc i32 %size to i16
   call void asm sideeffect "; $0", "v"(ptr addrspace(3) @lds)
@@ -162,8 +162,5 @@ define amdgpu_kernel void @v_test_sub_i16_constant_commute(ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.groupstaticsize() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @llvm.amdgcn.groupstaticsize() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index 6ec213a06999b6..02b3c8bfe9c70a 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GFX11
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -93,7 +93,7 @@ define amdgpu_kernel void @v_test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @s_test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in0, ptr addrspace(4) %in1) #1 {
+define amdgpu_kernel void @s_test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in0, ptr addrspace(4) %in1) nounwind {
 ; GFX9-LABEL: s_test_sub_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
@@ -172,7 +172,7 @@ define amdgpu_kernel void @s_test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @s_test_sub_self_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in0) #1 {
+define amdgpu_kernel void @s_test_sub_self_v2i16(ptr addrspace(1) %out, ptr addrspace(4) %in0) nounwind {
 ; GCN-LABEL: s_test_sub_self_v2i16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -211,7 +211,7 @@ define amdgpu_kernel void @s_test_sub_self_v2i16(ptr addrspace(1) %out, ptr addr
 }
 
 ; FIXME: VI should not scalarize arg access.
-define amdgpu_kernel void @s_test_sub_v2i16_kernarg(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) #1 {
+define amdgpu_kernel void @s_test_sub_v2i16_kernarg(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) nounwind {
 ; GFX9-LABEL: s_test_sub_v2i16_kernarg:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -274,7 +274,7 @@ define amdgpu_kernel void @s_test_sub_v2i16_kernarg(ptr addrspace(1) %out, <2 x
   ret void
 }
 
-define amdgpu_kernel void @v_test_sub_v2i16_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16_constant:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -346,7 +346,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(ptr addrspace(1) %out, ptr
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16_neg_constant:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -417,7 +417,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16_inline_neg1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -487,7 +487,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(ptr addrspace(1) %out, p
   ret void
 }
 
-define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16_inline_lo_zero_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -557,7 +557,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(ptr addrspace(1) %
 }
 
 ; The high element gives fp
-define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(ptr addrspace(1) %out, ptr addrspace(1) %in0) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(ptr addrspace(1) %out, ptr addrspace(1) %in0) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16_inline_fp_split:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -627,7 +627,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(ptr addrspace(1) %ou
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16_zext_to_v2i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -723,7 +723,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(ptr addrspace(1) %out,
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16_zext_to_v2i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -826,7 +826,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(ptr addrspace(1) %out,
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16_sext_to_v2i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -924,7 +924,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(ptr addrspace(1) %out,
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
+define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GFX9-LABEL: v_test_sub_v2i16_sext_to_v2i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -1031,7 +1031,4 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(ptr addrspace(1) %out,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
index 66c34414b69210..d5b0dba17b7ff5 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL:{{^}}row_filter_C1_D0:
-define amdgpu_kernel void @row_filter_C1_D0() #0 {
+define amdgpu_kernel void @row_filter_C1_D0() nounwind {
 entry:
   br i1 undef, label %for.inc.1, label %do.body.preheader
 
@@ -42,7 +42,7 @@ for.inc.1:                                        ; preds = %do.body.1562.prehea
 
 ; GCN-LABEL: {{^}}foo:
 ; GCN: s_endpgm
-define amdgpu_ps void @foo() #0 {
+define amdgpu_ps void @foo() nounwind {
 bb:
   br i1 undef, label %bb2, label %bb1
 
@@ -78,7 +78,7 @@ bb14:                                             ; preds = %bb27, %bb24, %bb9
   %tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ]
   %tmp17 = fmul float 1.050000e+01, %tmp16
   %tmp18 = fmul float 1.150000e+01, %tmp15
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp18, float %tmp17, float %tmp17, float %tmp17, i1 true, i1 true) #0
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp18, float %tmp17, float %tmp17, float %tmp17, i1 true, i1 true) nounwind
   ret void
 
 bb23:                                             ; preds = %bb13
@@ -96,8 +96,5 @@ bb27:                                             ; preds = %bb24
 }
 
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index 873567c3ab6f4c..7f33e8623d7df5 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -41,7 +41,7 @@ define amdgpu_kernel void @foobar(float %a0, float %a1, ptr addrspace(1) %out) n
 ; but the PHI node in "ife" block is divergent because of the CF dependency (divergent branch in bb0)
 entry:
   %v0 = insertelement <4 x float> undef, float %a0, i32 0
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %cnd = icmp eq i32 %tid, 0
   br i1 %cnd, label %ift, label %ife
 
@@ -56,6 +56,4 @@ ife:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
index 9189cef019cf40..53114c96beaba1 100644
--- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll
+++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
@@ -79,8 +79,6 @@ for.cond.cleanup:                                 ; preds = %for.cond4.preheader
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
-attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare i32 @llvm.amdgcn.workitem.id.x() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.amdgcn.workgroup.id.x() nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nocallback nofree nosync nounwind speculatable willreturn memory(none)
diff --git a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
index a5e14234c3d415..f0b5388df7a8fd 100644
--- a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
-define void @test(i1 %c0) #1 {
+define void @test(i1 %c0) nounwind {
   ; Clean up the unreachable blocks introduced with LowerSwitch pass.
   ; This test ensures that, in the pass flow, UnreachableBlockElim pass
   ; follows the LowerSwitch. Otherwise, this testcase will crash
@@ -21,7 +21,7 @@ define void @test(i1 %c0) #1 {
   ; GCN: bb.{{[0-9]+}}.Flow:
   ; GCN: bb.{{[0-9]+}}.UnifiedReturnBlock:
   entry:
-    %idx = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+    %idx = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     br i1 %c0, label %entry.true.blk, label %entry.false.blk
 
   entry.true.blk:                                   ; preds = %entry
@@ -43,7 +43,7 @@ define void @test(i1 %c0) #1 {
     br i1 %pre.exit, label %unreach.blk, label %pre.false.blk
 
   pre.false.blk:                                    ; preds = %preheader.blk
-    %call.pre.false = tail call i32 @func(i32 %idx) #0
+    %call.pre.false = tail call i32 @func(i32 %idx) nounwind readnone
     br label %unreach.blk
 
   unreach.blk:                                      ; preds = %preheader.blk, %pre.false.blk
@@ -55,8 +55,5 @@ define void @test(i1 %c0) #1 {
     ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @func(i32)#0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare i32 @func(i32)nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
index 1adf549c76767d..73cfca2a5456ab 100644
--- a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
+++ b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
@@ -121,8 +121,6 @@ main_body:
 }
 
 ; Function Attrs: nounwind readonly
-declare float @llvm.cos.f32(float) #1
+declare float @llvm.cos.f32(float) nounwind readonly
 
 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
-attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-cgp.ll b/llvm/test/CodeGen/AMDGPU/tail-call-cgp.ll
index 8292fdc7861354..38e290089c6f78 100644
--- a/llvm/test/CodeGen/AMDGPU/tail-call-cgp.ll
+++ b/llvm/test/CodeGen/AMDGPU/tail-call-cgp.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -codegenprepare %s | FileCheck %s
 
-define internal fastcc void @callee(ptr nocapture %p, i32 %a) #0 {
+define internal fastcc void @callee(ptr nocapture %p, i32 %a) nounwind {
   store volatile i32 %a, ptr %p, align 4
   ret void
 }
@@ -9,7 +9,7 @@ define internal fastcc void @callee(ptr nocapture %p, i32 %a) #0 {
 ; CHECK: tail call fastcc void @callee(
 ; CHECK-NEXT: ret void
 ; CHECK: ret void
-define void @func_caller(ptr nocapture %p, i32 %a, i32 %b) #0 {
+define void @func_caller(ptr nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
   %cmp = icmp eq i32 %b, 0
   br i1 %cmp, label %bb, label %ret
@@ -27,7 +27,7 @@ ret:
 ; CHECK-NEXT: br label %ret
 
 ; CHECK: ret void
-define amdgpu_kernel void @kernel_caller(ptr nocapture %p, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @kernel_caller(ptr nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
   %cmp = icmp eq i32 %b, 0
   br i1 %cmp, label %bb, label %ret
@@ -39,5 +39,3 @@ bb:
 ret:
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll b/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll
index 17ec27bc49db43..3d7ada9ceb6a2c 100644
--- a/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll
+++ b/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll
@@ -3,19 +3,19 @@
 ; Need to to trigger tail duplication this during
 ; MachineBlockPlacement, since calls aren't tail duplicated pre-RA.
 
-declare void @nonconvergent_func() #0
-declare void @convergent_func() #1
-declare void @llvm.amdgcn.s.barrier() #1
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #2
-declare void @llvm.amdgcn.ds.gws.sema.release.all(i32 %offset) #2
+declare void @nonconvergent_func() nounwind
+declare void @convergent_func() nounwind convergent
+declare void @llvm.amdgcn.s.barrier() nounwind convergent
+declare void @llvm.amdgcn.ds.gws.init(i32, i32) convergent inaccessiblememonly nounwind
+declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) convergent inaccessiblememonly nounwind
+declare void @llvm.amdgcn.ds.gws.sema.release.all(i32 %offset) convergent inaccessiblememonly nounwind
 
 ; barrier shouldn't be duplicated.
 
 ; GCN-LABEL: {{^}}taildup_barrier:
 ; GCN: s_barrier
 ; GCN-NOT: s_barrier
-define void @taildup_barrier(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) #0 {
+define void @taildup_barrier(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) nounwind {
 entry:
   br i1 %cond, label %bb1, label %bb2
 
@@ -38,7 +38,7 @@ ret:
 ; GCN-LABEL: {{^}}taildup_convergent_call:
 ; GCN: s_swappc_b64
 ; GCN-NOT: s_swappc_b64
-define void @taildup_convergent_call(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) #1 {
+define void @taildup_convergent_call(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) nounwind convergent {
 entry:
   br i1 %cond, label %bb1, label %bb2
 
@@ -63,7 +63,7 @@ ret:
 ; GCN-LABEL: {{^}}taildup_nonconvergent_call:
 ; GCN: s_swappc_b64
 ; GCN-NOT: s_swappc_b64
-define void @taildup_nonconvergent_call(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) #1 {
+define void @taildup_nonconvergent_call(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) nounwind convergent {
 entry:
   br i1 %cond, label %bb1, label %bb2
 
@@ -86,7 +86,7 @@ ret:
 ; GCN-LABEL: {{^}}taildup_convergent_tailcall:
 ; GCN: s_setpc_b64
 ; GCN-NOT: s_setpc_b64
-define void @taildup_convergent_tailcall(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) #1 {
+define void @taildup_convergent_tailcall(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond) nounwind convergent {
 entry:
   br i1 %cond, label %bb1, label %bb2
 
@@ -106,7 +106,7 @@ call:
 ; GCN-LABEL: {{^}}taildup_gws_init:
 ; GCN: ds_gws_init
 ; GCN-NOT: ds_gws_init
-define amdgpu_kernel void @taildup_gws_init(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond, i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @taildup_gws_init(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond, i32 %val, i32 %offset) nounwind {
 entry:
   br i1 %cond, label %bb1, label %bb2
 
@@ -129,7 +129,7 @@ ret:
 ; GCN-LABEL: {{^}}taildup_gws_barrier:
 ; GCN: ds_gws_barrier
 ; GCN-NOT: ds_gws_barrier
-define amdgpu_kernel void @taildup_gws_barrier(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond, i32 %val, i32 %offset) #0 {
+define amdgpu_kernel void @taildup_gws_barrier(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond, i32 %val, i32 %offset) nounwind {
 entry:
   br i1 %cond, label %bb1, label %bb2
 
@@ -152,7 +152,7 @@ ret:
 ; GCN-LABEL: {{^}}taildup_gws_sema_release_all:
 ; GCN: ds_gws_sema_release_all
 ; GCN-NOT: ds_gws
-define amdgpu_kernel void @taildup_gws_sema_release_all(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond, i32 %offset) #0 {
+define amdgpu_kernel void @taildup_gws_sema_release_all(ptr addrspace(1) %a, ptr addrspace(1) %b, i1 %cond, i32 %offset) nounwind {
 entry:
   br i1 %cond, label %bb1, label %bb2
 
@@ -171,7 +171,3 @@ call:
 ret:
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind convergent }
-attributes #2 = { convergent inaccessiblememonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/target-cpu.ll b/llvm/test/CodeGen/AMDGPU/target-cpu.ll
index 3119c32dcbaef4..fbe529957a7f2a 100644
--- a/llvm/test/CodeGen/AMDGPU/target-cpu.ll
+++ b/llvm/test/CodeGen/AMDGPU/target-cpu.ll
@@ -1,20 +1,20 @@
 ; RUN: llc -mtriple=amdgcn -disable-promote-alloca-to-vector -verify-machineinstrs < %s | FileCheck %s
 
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #1
+declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() nounwind readnone
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; CI+ intrinsic
-declare void @llvm.amdgcn.s.dcache.inv.vol() #0
+declare void @llvm.amdgcn.s.dcache.inv.vol() nounwind
 
 ; VI+ intrinsic
-declare void @llvm.amdgcn.s.dcache.wb() #0
+declare void @llvm.amdgcn.s.dcache.wb() nounwind
 
 ; CHECK-LABEL: {{^}}target_none:
 ; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
 ; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
 ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
-define amdgpu_kernel void @target_none() #0 {
+define amdgpu_kernel void @target_none() nounwind {
   %kernargs = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %kernargs.gep = getelementptr inbounds i8, ptr addrspace(4) %kernargs, i64 1024
   %ptr = load ptr addrspace(1), ptr addrspace(4) %kernargs.gep
@@ -29,7 +29,7 @@ define amdgpu_kernel void @target_none() #0 {
 ; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
 ; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSETREG]]
 ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
-define amdgpu_kernel void @target_tahiti() #1 {
+define amdgpu_kernel void @target_tahiti() nounwind readnone {
   %kernargs = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %kernargs.gep = getelementptr inbounds i8, ptr addrspace(4) %kernargs, i64 1024
   %ptr = load ptr addrspace(1), ptr addrspace(4) %kernargs.gep
@@ -44,7 +44,7 @@ define amdgpu_kernel void @target_tahiti() #1 {
 ; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x100
 ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
 ; CHECK: s_dcache_inv_vol
-define amdgpu_kernel void @target_bonaire() #3 {
+define amdgpu_kernel void @target_bonaire() nounwind "target-cpu"="bonaire" {
   %kernargs = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %kernargs.gep = getelementptr inbounds i8, ptr addrspace(4) %kernargs, i64 1024
   %ptr = load ptr addrspace(1), ptr addrspace(4) %kernargs.gep
@@ -60,7 +60,7 @@ define amdgpu_kernel void @target_bonaire() #3 {
 ; CHECK: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x400
 ; CHECK: flat_store_dword
 ; CHECK: s_dcache_wb{{$}}
-define amdgpu_kernel void @target_fiji() #4 {
+define amdgpu_kernel void @target_fiji() nounwind "target-cpu"="fiji" {
   %kernargs = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
   %kernargs.gep = getelementptr inbounds i8, ptr addrspace(4) %kernargs, i64 1024
   %ptr = load ptr addrspace(1), ptr addrspace(4) %kernargs.gep
@@ -74,7 +74,7 @@ define amdgpu_kernel void @target_fiji() #4 {
 
 ; CHECK-LABEL: {{^}}promote_alloca_enabled:
 ; CHECK: ds_read_b32
-define amdgpu_kernel void @promote_alloca_enabled(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #5 {
+define amdgpu_kernel void @promote_alloca_enabled(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "target-features"="+promote-alloca" "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %tmp = load i32, ptr addrspace(1) %in, align 4
@@ -88,7 +88,7 @@ entry:
 ; CHECK: SCRATCH_RSRC_DWORD0
 ; CHECK: SCRATCH_RSRC_DWORD1
 ; CHECK: ScratchSize: 24
-define amdgpu_kernel void @promote_alloca_disabled(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #6 {
+define amdgpu_kernel void @promote_alloca_disabled(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) nounwind "target-features"="-promote-alloca" "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
   %tmp = load i32, ptr addrspace(1) %in, align 4
@@ -97,11 +97,3 @@ entry:
   store i32 %load, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "target-cpu"="tahiti" }
-attributes #3 = { nounwind "target-cpu"="bonaire" }
-attributes #4 = { nounwind "target-cpu"="fiji" }
-attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="1,256" }
-attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll
index 878d59a83cdd68..bfb1599d5ad9f1 100644
--- a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll
@@ -12,9 +12,7 @@ define amdgpu_kernel void @ds_append_noalias() {
   ret void
 }
 
-declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) #0
-
-attributes #0 = { argmemonly convergent nounwind willreturn }
+declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) argmemonly convergent nounwind willreturn
 
 !0 = !{!1}
 !1 = distinct !{!1, !2}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
index 08dd90250d0b4b..8215403ed0c07b 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
@@ -26,21 +26,19 @@
 ; ELF-NEXT:   EF_AMDGPU_MACH_AMDGCN_GFX900   (0x2C)
 ; ELF-NEXT: ]
 
-define void @func0() #0 {
+define void @func0() "target-features"="-xnack" {
 entry:
   ret void
 }
 
-define void @func1() #0 {
+define void @func1() "target-features"="-xnack" {
 entry:
   ret void
 }
 
-define void @func2() #0 {
+define void @func2() "target-features"="-xnack" {
 entry:
   ret void
 }
-
-attributes #0 = { "target-features"="-xnack" }
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
index a8340ddadaaf7a..20d6c38a0ff5ec 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
@@ -26,22 +26,20 @@
 ; ELF-NEXT:   EF_AMDGPU_MACH_AMDGCN_GFX900  (0x2C)
 ; ELF-NEXT: ]
 
-define void @func0() #0 {
+define void @func0() "target-features"="+xnack" {
 entry:
   ret void
 }
 
-define void @func1() #0 {
+define void @func1() "target-features"="+xnack" {
 entry:
   ret void
 }
 
-define void @func2() #0 {
+define void @func2() "target-features"="+xnack" {
 entry:
   ret void
 }
 
-attributes #0 = { "target-features"="+xnack" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
index aefcfac23ff5de..7781fef00d427d 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
@@ -31,7 +31,7 @@ entry:
   ret void
 }
 
-define void @func1() #0 {
+define void @func1() "target-features"="-xnack" {
 entry:
   ret void
 }
@@ -41,7 +41,5 @@ entry:
   ret void
 }
 
-attributes #0 = { "target-features"="-xnack" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
index 6005c31622405c..eb3a3bb48e739d 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
@@ -26,7 +26,7 @@
 ; ELF-NEXT:   EF_AMDGPU_MACH_AMDGCN_GFX900   (0x2C)
 ; ELF-NEXT: ]
 
-define void @func0() #0 {
+define void @func0() "target-features"="-xnack" {
 entry:
   ret void
 }
@@ -41,7 +41,5 @@ entry:
   ret void
 }
 
-attributes #0 = { "target-features"="-xnack" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
index 328f56fb841b8f..9d30ec9d523782 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
@@ -31,7 +31,7 @@ entry:
   ret void
 }
 
-define void @func1() #0 {
+define void @func1() "target-features"="+xnack" {
 entry:
   ret void
 }
@@ -41,7 +41,5 @@ entry:
   ret void
 }
 
-attributes #0 = { "target-features"="+xnack" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
index c50dd8b2fec7aa..4bbf81699708cc 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
@@ -26,7 +26,7 @@
 ; ELF-NEXT:   EF_AMDGPU_MACH_AMDGCN_GFX900  (0x2C)
 ; ELF-NEXT: ]
 
-define void @func0() #0 {
+define void @func0() "target-features"="+xnack" {
 entry:
   ret void
 }
@@ -41,7 +41,5 @@ entry:
   ret void
 }
 
-attributes #0 = { "target-features"="+xnack" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll
index 0f54d783484dd4..38b4010c04c208 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-invalid-any-off-on.ll
@@ -7,18 +7,15 @@ entry:
   ret void
 }
 
-define void @func1() #0 {
+define void @func1() "target-features"="-xnack" {
 entry:
   ret void
 }
 
-define void @func2() #1 {
+define void @func2() "target-features"="+xnack" {
 entry:
   ret void
 }
 
-attributes #0 = { "target-features"="-xnack" }
-attributes #1 = { "target-features"="+xnack" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
index 60ff8b2dbb5ebc..7b1bb8e3cac2e1 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
@@ -26,12 +26,10 @@
 ; ELF-NEXT:   EF_AMDGPU_MACH_AMDGCN_GFX900   (0x2C)
 ; ELF-NEXT: ]
 
-define void @func0() #0 {
+define void @func0() "target-features"="-xnack" {
 entry:
   ret void
 }
 
-attributes #0 = { "target-features"="-xnack" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
index e04629a24209eb..02093f60b16970 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
@@ -26,12 +26,10 @@
 ; ELF-NEXT:   EF_AMDGPU_MACH_AMDGCN_GFX900  (0x2C)
 ; ELF-NEXT: ]
 
-define void @func0() #0 {
+define void @func0() "target-features"="+xnack" {
 entry:
   ret void
 }
 
-attributes #0 = { "target-features"="+xnack" }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
index eef5f57beb07d7..e0d4c1ec96ed84 100644
--- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
+++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s
 ; If the block containing the SI_RETURN_TO_EPILOG is not the last block, insert an empty block at the end and
 ; insert an unconditional jump there.
-define amdgpu_ps float @simple_test_return_to_epilog(float %a) #0 {
+define amdgpu_ps float @simple_test_return_to_epilog(float %a) nounwind {
   ; GCN-LABEL: name: simple_test_return_to_epilog
   ; GCN: bb.0.entry:
   ; GCN-NEXT:   liveins: $vgpr0
@@ -12,7 +12,7 @@ entry:
   ret float %a
 }
 
-define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float %b) #0 {
+define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float %b) nounwind {
   ; GCN-LABEL: name: test_return_to_epilog_into_end_block
   ; GCN: bb.0.entry:
   ; GCN-NEXT:   successors: %bb.1(0x80000000), %bb.2(0x00000000)
@@ -45,7 +45,7 @@ else:                                             ; preds = %entry
   unreachable
 }
 
-define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, i32 inreg %b, float %c, float %d) #0 {
+define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, i32 inreg %b, float %c, float %d) nounwind {
   ; GCN-LABEL: name: test_unify_return_to_epilog_into_end_block
   ; GCN: bb.0.entry:
   ; GCN-NEXT:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
@@ -97,7 +97,7 @@ else:                                             ; preds = %else.if.cond
   unreachable
 }
 
-define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(float %val) #0 {
+define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(float %val) nounwind {
   ; GCN-LABEL: name: test_return_to_epilog_with_optimized_kill
   ; GCN: bb.0 (%ir-block.0):
   ; GCN-NEXT:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
@@ -189,6 +189,4 @@ end:                                              ; preds = %kill0, %kill1, %flo
   ret { <4 x float> } undef
 }
 
-declare void @llvm.amdgcn.kill(i1) #0
-
-attributes #0 = { nounwind }
+declare void @llvm.amdgcn.kill(i1) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
index 3cd6c98ef4b8e0..1c6a6158cf0ba1 100644
--- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
@@ -4,8 +4,8 @@
 ; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900 %s
 ; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900 %s
 
-declare void @llvm.trap() #0
-declare void @llvm.debugtrap() #1
+declare void @llvm.trap() nounwind noreturn
+declare void @llvm.debugtrap() nounwind
 
 define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
 ; NOHSA-TRAP-GFX900-LABEL: trap:
@@ -203,8 +203,5 @@ define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0)
   ret void
 }
 
-attributes #0 = { nounwind noreturn }
-attributes #1 = { nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll
index 2f687295af73e0..15554865cd3a12 100644
--- a/llvm/test/CodeGen/AMDGPU/trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap.ll
@@ -26,8 +26,8 @@
 ; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (ptr addrspace(1)): debugtrap handler not supported
 
 
-declare void @llvm.trap() #0
-declare void @llvm.debugtrap() #1
+declare void @llvm.trap() nounwind noreturn
+declare void @llvm.debugtrap() nounwind
 
 ; MESA-TRAP: .section .AMDGPU.config
 ; MESA-TRAP:  .long   47180
@@ -142,8 +142,5 @@ ret:
   ret void
 }
 
-attributes #0 = { nounwind noreturn }
-attributes #1 = { nounwind }
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll
index 416dbb226422cc..3270fe687b8063 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
 
-define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 ; SI-LABEL: s_uaddo_i64_zext:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -72,7 +72,7 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
 
 ; FIXME: Could do scalar
 
-define amdgpu_kernel void @s_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: s_uaddo_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -129,7 +129,7 @@ define amdgpu_kernel void @s_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_uaddo_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -207,7 +207,7 @@ define amdgpu_kernel void @v_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_uaddo_i32_novcc(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_uaddo_i32_novcc(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_uaddo_i32_novcc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -293,12 +293,12 @@ define amdgpu_kernel void @v_uaddo_i32_novcc(ptr addrspace(1) %out, ptr addrspac
   %val = extractvalue { i32, i1 } %uadd, 0
   %carry = extractvalue { i32, i1 } %uadd, 1
   store volatile i32 %val, ptr addrspace(1) %out, align 4
-  call void asm sideeffect "", "~{vcc}"() #0
+  call void asm sideeffect "", "~{vcc}"() nounwind
   store volatile i1 %carry, ptr addrspace(1) %carryout
   ret void
 }
 
-define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) nounwind {
 ; SI-LABEL: s_uaddo_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -367,7 +367,7 @@ define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_uaddo_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -451,7 +451,7 @@ define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_uaddo_i16(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_uaddo_i16(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_uaddo_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -615,7 +615,7 @@ define amdgpu_kernel void @v_uaddo_v2i32(ptr addrspace(1) %out, ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @s_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: s_uaddo_clamp_bit:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
@@ -703,7 +703,7 @@ exit:
   ret void
 }
 
-define amdgpu_kernel void @v_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_uaddo_clamp_bit:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x9
@@ -876,13 +876,9 @@ define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) #1
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
-declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1
-declare { i128, i1 } @llvm.uadd.with.overflow.i128(i128, i128) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+declare { i128, i1 } @llvm.uadd.with.overflow.i128(i128, i128) nounwind readnone
 declare { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index b714fda6f1d076..e701b69b0f987c 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -614,17 +614,15 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
   ret i64 %result
 }
 
-declare i8 @llvm.uadd.sat.i8(i8, i8) #0
-declare i16 @llvm.uadd.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare i32 @llvm.uadd.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>) #0
-declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) #0
-declare i64 @llvm.uadd.sat.i64(i64, i64) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i8 @llvm.uadd.sat.i8(i8, i8) nounwind readnone speculatable willreturn
+declare i16 @llvm.uadd.sat.i16(i16, i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16>, <3 x i16>) nounwind readnone speculatable willreturn
+declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone speculatable willreturn
+declare i32 @llvm.uadd.sat.i32(i32, i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone speculatable willreturn
+declare <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32>, <3 x i32>) nounwind readnone speculatable willreturn
+declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone speculatable willreturn
+declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>) nounwind readnone speculatable willreturn
+declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) nounwind readnone speculatable willreturn
+declare i64 @llvm.uadd.sat.i64(i64, i64) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/udivrem24.ll b/llvm/test/CodeGen/AMDGPU/udivrem24.ll
index dc588434076e63..23772d79cb694c 100644
--- a/llvm/test/CodeGen/AMDGPU/udivrem24.ll
+++ b/llvm/test/CodeGen/AMDGPU/udivrem24.ll
@@ -31,7 +31,7 @@ define amdgpu_kernel void @udiv24_i8(ptr addrspace(1) %out, ptr addrspace(1) %in
 ; EG-DAG: UINT_TO_FLT
 ; EG-DAG: RECIP_IEEE
 ; EG: FLT_TO_UINT
-define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(ptr addrspace(1) %out, ptr addrspace(1) %in) "denormal-fp-math-f32"="preserve-sign,preserve-sign" {
   %den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1
   %num = load i8, ptr addrspace(1) %in
   %den = load i8, ptr addrspace(1) %den_ptr
@@ -50,7 +50,7 @@ define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(ptr addrspace(1) %out,
 ; EG-DAG: UINT_TO_FLT
 ; EG-DAG: RECIP_IEEE
 ; EG: FLT_TO_UINT
-define amdgpu_kernel void @udiv24_i8_denorm_flush_in(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @udiv24_i8_denorm_flush_in(ptr addrspace(1) %out, ptr addrspace(1) %in) "denormal-fp-math-f32"="ieee,preserve-sign" {
   %den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1
   %num = load i8, ptr addrspace(1) %in
   %den = load i8, ptr addrspace(1) %den_ptr
@@ -69,7 +69,7 @@ define amdgpu_kernel void @udiv24_i8_denorm_flush_in(ptr addrspace(1) %out, ptr
 ; EG-DAG: UINT_TO_FLT
 ; EG-DAG: RECIP_IEEE
 ; EG: FLT_TO_UINT
-define amdgpu_kernel void @udiv24_i8_denorm_flush_out(ptr addrspace(1) %out, ptr addrspace(1) %in) #2 {
+define amdgpu_kernel void @udiv24_i8_denorm_flush_out(ptr addrspace(1) %out, ptr addrspace(1) %in) "denormal-fp-math-f32"="preserve-sign,ieee" {
   %den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1
   %num = load i8, ptr addrspace(1) %in
   %den = load i8, ptr addrspace(1) %den_ptr
@@ -380,7 +380,3 @@ define amdgpu_kernel void @test_udiv24_u23_u16_i32(ptr addrspace(1) %out, ptr ad
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
-attributes #1 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
-attributes #2 = { "denormal-fp-math-f32"="preserve-sign,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
index 79b0a966bc1fbd..1ef9005d9a142f 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
@@ -5,7 +5,7 @@
 
 ; FIXME: This should be merged with uint_to_fp.ll, but s_uint_to_fp_v2i64 crashes on r600
 
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %in) nounwind {
 ; GFX6-LABEL: s_uint_to_fp_i64_to_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -72,7 +72,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, i64 %i
   ret void
 }
 
-define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_uint_to_fp_i64_to_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -158,7 +158,7 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) nounwind {
 ; GFX6-LABEL: s_uint_to_fp_i64_to_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -221,7 +221,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %i
   ret void
 }
 
-define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_uint_to_fp_i64_to_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -303,7 +303,7 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2 x i64> %in) nounwind{
 ; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -389,7 +389,7 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f32(ptr addrspace(1) %out, <2
   ret void
 }
 
-define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -553,7 +553,7 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) #0{
+define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2 x i64> %in) nounwind{
 ; GFX6-LABEL: s_uint_to_fp_v2i64_to_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -651,7 +651,7 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f16(ptr addrspace(1) %out, <2
   ret void
 }
 
-define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; GFX6-LABEL: v_uint_to_fp_v4i64_to_v4f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -838,7 +838,4 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, pt
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
index b3d589494b62fe..e09f0ce52ece5c 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
@@ -6,7 +6,7 @@
 ; SI: v_cvt_f32_u32_e32
 
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %in) nounwind {
   %result = uitofp i32 %in to float
   store float %result, ptr addrspace(1) %out
   ret void
@@ -16,7 +16,7 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, i32 %i
 ; SI: v_cvt_f32_u32_e32 {{v[0-9]+}}, {{v[0-9]+$}}
 
 ; R600: INT_TO_FLT
-define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -32,7 +32,7 @@ define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(ptr addrspace(1) %out, ptr ad
 
 ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
 ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(ptr addrspace(1) %out, <2 x i32> %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(ptr addrspace(1) %out, <2 x i32> %in) nounwind {
   %result = uitofp <2 x i32> %in to <2 x float>
   store <2 x float> %result, ptr addrspace(1) %out
   ret void
@@ -49,7 +49,7 @@ define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f32(ptr addrspace(1) %out, <2
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %value = load <4 x i32>, ptr addrspace(1) %in
   %result = uitofp <4 x i32> %value to <4 x float>
   store <4 x float> %result, ptr addrspace(1) %out
@@ -66,7 +66,7 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f32(ptr addrspace(1) %out, pt
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @v_uint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
@@ -82,7 +82,7 @@ define amdgpu_kernel void @v_uint_to_fp_v4i32(ptr addrspace(1) %out, ptr addrspa
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
-define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(ptr addrspace(1) %out, i32 %in) nounwind {
   %cmp = icmp eq i32 %in, 0
   %fp = uitofp i1 %cmp to float
   store float %fp, ptr addrspace(1) %out
@@ -93,7 +93,7 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32(ptr addrspace(1) %out, i32 %in
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
-define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(ptr addrspace(1) %out, i1 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(ptr addrspace(1) %out, i1 %in) nounwind {
   %fp = uitofp i1 %in to float
   store float %fp, ptr addrspace(1) %out
   ret void
@@ -106,7 +106,7 @@ define amdgpu_kernel void @s_uint_to_fp_i1_to_f32_load(ptr addrspace(1) %out, i1
 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
 ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i1, ptr addrspace(1) %in, i32 %tid
   %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
@@ -122,14 +122,11 @@ define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr a
 ; R600: CNDE_INT
 ; R600: UINT_TO_FLT
 
-define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) #0 {
+define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(ptr addrspace(1) %out, i64 %in) nounwind {
 entry:
   %cvt = uitofp i64 %in to float
   store float %cvt, ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/umed3.ll b/llvm/test/CodeGen/AMDGPU/umed3.ll
index a2d99f1f8c2c26..ebade04ab15940 100644
--- a/llvm/test/CodeGen/AMDGPU/umed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/umed3.ll
@@ -2,11 +2,11 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_i32:
 ; GCN: v_med3_u32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
-define amdgpu_kernel void @v_test_umed3_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_umed3_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -25,7 +25,7 @@ define amdgpu_kernel void @v_test_umed3_r_i_i_i32(ptr addrspace(1) %out, ptr add
 ; GCN-LABEL: {{^}}v_test_umed3_multi_use_r_i_i_i32:
 ; GCN: v_max_u32
 ; GCN: v_min_u32
-define amdgpu_kernel void @v_test_umed3_multi_use_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_umed3_multi_use_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -45,7 +45,7 @@ define amdgpu_kernel void @v_test_umed3_multi_use_r_i_i_i32(ptr addrspace(1) %ou
 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_sign_mismatch_i32:
 ; GCN: v_max_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
 ; GCN: v_min_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
-define amdgpu_kernel void @v_test_umed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_umed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -64,7 +64,7 @@ define amdgpu_kernel void @v_test_umed3_r_i_i_sign_mismatch_i32(ptr addrspace(1)
 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_i64:
 ; GCN: v_cmp_lt_u64
 ; GCN: v_cmp_gt_u64
-define amdgpu_kernel void @v_test_umed3_r_i_i_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_umed3_r_i_i_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
@@ -85,7 +85,7 @@ define amdgpu_kernel void @v_test_umed3_r_i_i_i64(ptr addrspace(1) %out, ptr add
 ; VI: v_max_u16_e32 [[MAX:v[0-9]]], 12, {{v[0-9]}}
 ; VI: v_min_u16_e32 {{v[0-9]}}, 17, [[MAX]]
 ; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
-define amdgpu_kernel void @v_test_umed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
+define amdgpu_kernel void @v_test_umed3_r_i_i_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
   %outgep = getelementptr i16, ptr addrspace(1) %out, i32 %tid
@@ -101,37 +101,37 @@ define amdgpu_kernel void @v_test_umed3_r_i_i_i16(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define internal i32 @umin(i32 %x, i32 %y) #2 {
+define internal i32 @umin(i32 %x, i32 %y) nounwind readnone alwaysinline {
   %cmp = icmp ult i32 %x, %y
   %sel = select i1 %cmp, i32 %x, i32 %y
   ret i32 %sel
 }
 
-define internal i32 @umax(i32 %x, i32 %y) #2 {
+define internal i32 @umax(i32 %x, i32 %y) nounwind readnone alwaysinline {
   %cmp = icmp ugt i32 %x, %y
   %sel = select i1 %cmp, i32 %x, i32 %y
   ret i32 %sel
 }
 
-define internal i16 @umin16(i16 %x, i16 %y) #2 {
+define internal i16 @umin16(i16 %x, i16 %y) nounwind readnone alwaysinline {
   %cmp = icmp ult i16 %x, %y
   %sel = select i1 %cmp, i16 %x, i16 %y
   ret i16 %sel
 }
 
-define internal i16 @umax16(i16 %x, i16 %y) #2 {
+define internal i16 @umax16(i16 %x, i16 %y) nounwind readnone alwaysinline {
   %cmp = icmp ugt i16 %x, %y
   %sel = select i1 %cmp, i16 %x, i16 %y
   ret i16 %sel
 }
 
-define internal i8 @umin8(i8 %x, i8 %y) #2 {
+define internal i8 @umin8(i8 %x, i8 %y) nounwind readnone alwaysinline {
   %cmp = icmp ult i8 %x, %y
   %sel = select i1 %cmp, i8 %x, i8 %y
   ret i8 %sel
 }
 
-define internal i8 @umax8(i8 %x, i8 %y) #2 {
+define internal i8 @umax8(i8 %x, i8 %y) nounwind readnone alwaysinline {
   %cmp = icmp ugt i8 %x, %y
   %sel = select i1 %cmp, i8 %x, i8 %y
   ret i8 %sel
@@ -156,7 +156,7 @@ define internal i8 @umax8(i8 %x, i8 %y) #2 {
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -168,7 +168,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_1:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -180,7 +180,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_2:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -192,7 +192,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_3:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_3(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_3(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -204,7 +204,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_4:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_4(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_4(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -216,7 +216,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_5:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_5(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_5(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -228,7 +228,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_6:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_6(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_6(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -240,7 +240,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_7:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_7(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_7(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -252,7 +252,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_8:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_8(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_8(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -264,7 +264,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_9:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_9(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_9(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -276,7 +276,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_10:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_10(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_10(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -288,7 +288,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_11:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_11(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_11(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -300,7 +300,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_12:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_12(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_12(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -312,7 +312,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_13:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_13(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_13(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -324,7 +324,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_14:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_14(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_14(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -336,7 +336,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_15:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_15(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_15(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -362,7 +362,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_16:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_16(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_16(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -374,7 +374,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_17:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_17(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_17(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -386,7 +386,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_18:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_18(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_18(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -398,7 +398,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_19:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_19(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_19(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -410,7 +410,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_20:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_20(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_20(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -422,7 +422,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_21:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_21(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_21(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -434,7 +434,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_22:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_22(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_22(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -446,7 +446,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_23:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_23(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_23(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -458,7 +458,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_24:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_24(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_24(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -470,7 +470,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_25:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_25(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_25(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -482,7 +482,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_26:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_26(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_26(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -494,7 +494,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_27:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_27(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_27(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -506,7 +506,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_28:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_28(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_28(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -518,7 +518,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_29:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_29(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_29(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -530,7 +530,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_30:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_30(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_30(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -542,7 +542,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_31:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_31(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_31(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %y, i32 %x)
   %tmp1 = call i32 @umax(i32 %y, i32 %x)
@@ -557,7 +557,7 @@ bb:
 ; GCN: s_and_b32
 ; GCN: s_and_b32
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i16_pat_0(ptr addrspace(1) %arg, [8 x i32], i16 %x, [8 x i32], i16 %y, [8 x i32], i16 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i16_pat_0(ptr addrspace(1) %arg, [8 x i32], i16 %x, [8 x i32], i16 %y, [8 x i32], i16 %z) nounwind {
 bb:
   %tmp0 = call i16 @umin16(i16 %x, i16 %y)
   %tmp1 = call i16 @umax16(i16 %x, i16 %y)
@@ -572,7 +572,7 @@ bb:
 ; GCN: s_and_b32
 ; GCN: s_and_b32
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i8_pat_0(ptr addrspace(1) %arg, [8 x i32], i8 %x, [8 x i32], i8 %y, [8 x i32], i8 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i8_pat_0(ptr addrspace(1) %arg, [8 x i32], i8 %x, [8 x i32], i8 %y, [8 x i32], i8 %z) nounwind {
 bb:
   %tmp0 = call i8 @umin8(i8 %x, i8 %y)
   %tmp1 = call i8 @umax8(i8 %x, i8 %y)
@@ -586,7 +586,7 @@ bb:
 ; GCN: s_min_u32
 ; GCN-NOT: {{s_min_u32|s_max_u32}}
 ; GCN: v_med3_u32
-define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -601,7 +601,7 @@ bb:
 ; GCN: s_max_u32
 ; GCN-NOT: {{s_min_u32|s_max_u32}}
 ; GCN: v_med3_u32
-define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -617,7 +617,7 @@ bb:
 ; GCN: s_min_u32
 ; GCN-NOT: {{s_min_u32|s_max_u32}}
 ; GCN: v_med3_u32
-define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -631,7 +631,7 @@ bb:
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_multi_use_result:
 ; GCN-NOT: {{s_min_u32|s_max_u32}}
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_result(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_result(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -646,7 +646,7 @@ bb:
 ; GCN-NOT: {{s_min_u32|s_max_u32}}
 ; GCN: v_med3_u32 v{{[0-9]+}}, [[B0:s[0-9]+]], [[B1:v[0-9]+]], v{{[0-9]+}}
 ; GCN: v_med3_u32 v{{[0-9]+}}, [[B0]], [[B1]], v{{[0-9]+}}
-define amdgpu_kernel void @s_test_smed3_reuse_bounds(ptr addrspace(1) %arg, i32 %b0, i32 %b1, i32 %x, i32 %y) #1 {
+define amdgpu_kernel void @s_test_smed3_reuse_bounds(ptr addrspace(1) %arg, i32 %b0, i32 %b1, i32 %x, i32 %y) nounwind {
 bb:
   %lo = call i32 @umin(i32 %b0, i32 %b1)
   %hi = call i32 @umax(i32 %b0, i32 %b1)
@@ -664,7 +664,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_imm_src0:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src0(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 1, i32 %y)
   %tmp1 = call i32 @umax(i32 1, i32 %y)
@@ -676,7 +676,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_imm_src1:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}}
-define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src1(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 2)
   %tmp1 = call i32 @umax(i32 %x, i32 2)
@@ -688,7 +688,7 @@ bb:
 
 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_imm_src2:
 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 9
-define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) #1 {
+define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src2(ptr addrspace(1) %arg, i32 %x, i32 %y, i32 %z) nounwind {
 bb:
   %tmp0 = call i32 @umin(i32 %x, i32 %y)
   %tmp1 = call i32 @umax(i32 %x, i32 %y)
@@ -708,7 +708,7 @@ bb:
 ; VI: v_max_u16
 
 ; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @v_test_umed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
+define amdgpu_kernel void @v_test_umed3_i16_pat_0(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i32 %tid
@@ -729,7 +729,7 @@ bb:
 
 ; GCN-LABEL: {{^}}v_test_umed3_i16_pat_1:
 ; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @v_test_umed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #1 {
+define amdgpu_kernel void @v_test_umed3_i16_pat_1(ptr addrspace(1) %arg, ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) nounwind {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep0 = getelementptr inbounds i16, ptr addrspace(1) %a.ptr, i32 %tid
@@ -747,7 +747,3 @@ bb:
   store i16 %tmp3, ptr addrspace(1) %out.gep
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone alwaysinline }
diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
index 4e734d6e0884bc..c541521e7b37b2 100644
--- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
@@ -9,7 +9,7 @@
 ; SI: ds_write_b8
 ; SI: ds_write_b8
 ; SI: s_endpgm
-define amdgpu_kernel void @local_unaligned_load_store_i16(ptr addrspace(3) %p, ptr addrspace(3) %r) #0 {
+define amdgpu_kernel void @local_unaligned_load_store_i16(ptr addrspace(3) %p, ptr addrspace(3) %r) nounwind {
   %v = load i16, ptr addrspace(3) %p, align 1
   store i16 %v, ptr addrspace(3) %r, align 1
   ret void
@@ -24,7 +24,7 @@ define amdgpu_kernel void @local_unaligned_load_store_i16(ptr addrspace(3) %p, p
 ; UNALIGNED: buffer_load_ushort
 ; UNALIGNED: buffer_store_short
 ; SI: s_endpgm
-define amdgpu_kernel void @global_unaligned_load_store_i16(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @global_unaligned_load_store_i16(ptr addrspace(1) %p, ptr addrspace(1) %r) nounwind {
   %v = load i16, ptr addrspace(1) %p, align 1
   store i16 %v, ptr addrspace(1) %r, align 1
   ret void
@@ -43,7 +43,7 @@ define amdgpu_kernel void @global_unaligned_load_store_i16(ptr addrspace(1) %p,
 ; SI: ds_write_b8
 ; SI: ds_write_b8
 ; SI: s_endpgm
-define amdgpu_kernel void @local_unaligned_load_store_i32(ptr addrspace(3) %p, ptr addrspace(3) %r) #0 {
+define amdgpu_kernel void @local_unaligned_load_store_i32(ptr addrspace(3) %p, ptr addrspace(3) %r) nounwind {
   %v = load i32, ptr addrspace(3) %p, align 1
   store i32 %v, ptr addrspace(3) %r, align 1
   ret void
@@ -61,7 +61,7 @@ define amdgpu_kernel void @local_unaligned_load_store_i32(ptr addrspace(3) %p, p
 
 ; UNALIGNED: buffer_load_dword
 ; UNALIGNED: buffer_store_dword
-define amdgpu_kernel void @global_unaligned_load_store_i32(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @global_unaligned_load_store_i32(ptr addrspace(1) %p, ptr addrspace(1) %r) nounwind {
   %v = load i32, ptr addrspace(1) %p, align 1
   store i32 %v, ptr addrspace(1) %r, align 1
   ret void
@@ -75,7 +75,7 @@ define amdgpu_kernel void @global_unaligned_load_store_i32(ptr addrspace(1) %p,
 
 ; UNALIGNED: buffer_load_dword
 ; UNALIGNED: buffer_store_dword
-define amdgpu_kernel void @global_align2_load_store_i32(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @global_align2_load_store_i32(ptr addrspace(1) %p, ptr addrspace(1) %r) nounwind {
   %v = load i32, ptr addrspace(1) %p, align 2
   store i32 %v, ptr addrspace(1) %r, align 2
   ret void
@@ -86,7 +86,7 @@ define amdgpu_kernel void @global_align2_load_store_i32(ptr addrspace(1) %p, ptr
 ; GCN: ds_read_u16
 ; GCN: ds_write_b16
 ; GCN: ds_write_b16
-define amdgpu_kernel void @local_align2_load_store_i32(ptr addrspace(3) %p, ptr addrspace(3) %r) #0 {
+define amdgpu_kernel void @local_align2_load_store_i32(ptr addrspace(3) %p, ptr addrspace(3) %r) nounwind {
   %v = load i32, ptr addrspace(3) %p, align 2
   store i32 %v, ptr addrspace(3) %r, align 2
   ret void
@@ -133,7 +133,7 @@ define amdgpu_kernel void @local_align2_load_store_i32(ptr addrspace(3) %p, ptr
 ; SI-NOT: v_lshl
 ; SI: ds_write_b8
 ; SI: s_endpgm
-define amdgpu_kernel void @local_unaligned_load_store_i64(ptr addrspace(3) %p, ptr addrspace(3) %r) #0 {
+define amdgpu_kernel void @local_unaligned_load_store_i64(ptr addrspace(3) %p, ptr addrspace(3) %r) nounwind {
   %v = load i64, ptr addrspace(3) %p, align 1
   store i64 %v, ptr addrspace(3) %r, align 1
   ret void
@@ -180,7 +180,7 @@ define amdgpu_kernel void @local_unaligned_load_store_i64(ptr addrspace(3) %p, p
 ; SI-NOT: v_lshl
 ; SI: ds_write_b8
 ; SI: s_endpgm
-define amdgpu_kernel void @local_unaligned_load_store_v2i32(ptr addrspace(3) %p, ptr addrspace(3) %r) #0 {
+define amdgpu_kernel void @local_unaligned_load_store_v2i32(ptr addrspace(3) %p, ptr addrspace(3) %r) nounwind {
   %v = load <2 x i32>, ptr addrspace(3) %p, align 1
   store <2 x i32> %v, ptr addrspace(3) %r, align 1
   ret void
@@ -210,7 +210,7 @@ define amdgpu_kernel void @local_unaligned_load_store_v2i32(ptr addrspace(3) %p,
 
 ; UNALIGNED: buffer_load_dwordx2
 ; UNALIGNED: buffer_store_dwordx2
-define amdgpu_kernel void @global_align2_load_store_i64(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @global_align2_load_store_i64(ptr addrspace(1) %p, ptr addrspace(1) %r) nounwind {
   %v = load i64, ptr addrspace(1) %p, align 2
   store i64 %v, ptr addrspace(1) %r, align 2
   ret void
@@ -240,7 +240,7 @@ define amdgpu_kernel void @global_align2_load_store_i64(ptr addrspace(1) %p, ptr
 
 ; UNALIGNED: buffer_load_dwordx2
 ; UNALIGNED: buffer_store_dwordx2
-define amdgpu_kernel void @unaligned_load_store_i64_global(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @unaligned_load_store_i64_global(ptr addrspace(1) %p, ptr addrspace(1) %r) nounwind {
   %v = load i64, ptr addrspace(1) %p, align 1
   store i64 %v, ptr addrspace(1) %r, align 1
   ret void
@@ -287,7 +287,7 @@ define amdgpu_kernel void @unaligned_load_store_i64_global(ptr addrspace(1) %p,
 ; GCN: ds_write_b8
 ; GCN: ds_write_b8
 ; GCN: s_endpgm
-define amdgpu_kernel void @local_unaligned_load_store_v4i32(ptr addrspace(3) %p, ptr addrspace(3) %r) #0 {
+define amdgpu_kernel void @local_unaligned_load_store_v4i32(ptr addrspace(3) %p, ptr addrspace(3) %r) nounwind {
   %v = load <4 x i32>, ptr addrspace(3) %p, align 1
   store <4 x i32> %v, ptr addrspace(3) %r, align 1
   ret void
@@ -330,7 +330,7 @@ define amdgpu_kernel void @local_unaligned_load_store_v4i32(ptr addrspace(3) %p,
 
 ; UNALIGNED: buffer_load_dwordx4
 ; UNALIGNED: buffer_store_dwordx4
-define amdgpu_kernel void @global_unaligned_load_store_v4i32(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @global_unaligned_load_store_v4i32(ptr addrspace(1) %p, ptr addrspace(1) %r) nounwind {
   %v = load <4 x i32>, ptr addrspace(1) %p, align 1
   store <4 x i32> %v, ptr addrspace(1) %r, align 1
   ret void
@@ -338,7 +338,7 @@ define amdgpu_kernel void @global_unaligned_load_store_v4i32(ptr addrspace(1) %p
 
 ; GCN-LABEL: {{^}}local_load_i64_align_4:
 ; GCN: ds_read2_b32
-define amdgpu_kernel void @local_load_i64_align_4(ptr addrspace(1) nocapture %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_i64_align_4(ptr addrspace(1) nocapture %out, ptr addrspace(3) %in) nounwind {
   %val = load i64, ptr addrspace(3) %in, align 4
   store i64 %val, ptr addrspace(1) %out, align 8
   ret void
@@ -346,7 +346,7 @@ define amdgpu_kernel void @local_load_i64_align_4(ptr addrspace(1) nocapture %ou
 
 ; GCN-LABEL: {{^}}local_load_i64_align_4_with_offset
 ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
-define amdgpu_kernel void @local_load_i64_align_4_with_offset(ptr addrspace(1) nocapture %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_i64_align_4_with_offset(ptr addrspace(1) nocapture %out, ptr addrspace(3) %in) nounwind {
   %ptr = getelementptr i64, ptr addrspace(3) %in, i32 4
   %val = load i64, ptr addrspace(3) %ptr, align 4
   store i64 %val, ptr addrspace(1) %out, align 8
@@ -357,7 +357,7 @@ define amdgpu_kernel void @local_load_i64_align_4_with_offset(ptr addrspace(1) n
 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
 ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
 ; GCN: s_endpgm
-define amdgpu_kernel void @local_load_i64_align_4_with_split_offset(ptr addrspace(1) nocapture %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_i64_align_4_with_split_offset(ptr addrspace(1) nocapture %out, ptr addrspace(3) %in) nounwind {
   %ptr255 = getelementptr i32, ptr addrspace(3) %in, i32 255
   %val = load i64, ptr addrspace(3) %ptr255, align 4
   store i64 %val, ptr addrspace(1) %out, align 8
@@ -374,7 +374,7 @@ define amdgpu_kernel void @local_load_i64_align_4_with_split_offset(ptr addrspac
 ; GCN: ds_read_u8
 ; GCN: ds_read_u8
 ; GCN: store_dwordx2
-define amdgpu_kernel void @local_load_i64_align_1(ptr addrspace(1) nocapture %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_i64_align_1(ptr addrspace(1) nocapture %out, ptr addrspace(3) %in) nounwind {
   %val = load i64, ptr addrspace(3) %in, align 1
   store i64 %val, ptr addrspace(1) %out, align 8
   ret void
@@ -382,7 +382,7 @@ define amdgpu_kernel void @local_load_i64_align_1(ptr addrspace(1) nocapture %ou
 
 ; GCN-LABEL: {{^}}local_store_i64_align_4:
 ; GCN: ds_write2_b32
-define amdgpu_kernel void @local_store_i64_align_4(ptr addrspace(3) %out, i64 %val) #0 {
+define amdgpu_kernel void @local_store_i64_align_4(ptr addrspace(3) %out, i64 %val) nounwind {
   store i64 %val, ptr addrspace(3) %out, align 4
   ret void
 }
@@ -390,7 +390,7 @@ define amdgpu_kernel void @local_store_i64_align_4(ptr addrspace(3) %out, i64 %v
 ; GCN-LABEL: {{^}}local_store_i64_align_4_with_offset
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
 ; GCN: s_endpgm
-define amdgpu_kernel void @local_store_i64_align_4_with_offset(ptr addrspace(3) %out) #0 {
+define amdgpu_kernel void @local_store_i64_align_4_with_offset(ptr addrspace(3) %out) nounwind {
   %ptr = getelementptr i64, ptr addrspace(3) %out, i32 4
   store i64 0, ptr addrspace(3) %ptr, align 4
   ret void
@@ -400,7 +400,7 @@ define amdgpu_kernel void @local_store_i64_align_4_with_offset(ptr addrspace(3)
 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
 ; GCN: s_endpgm
-define amdgpu_kernel void @local_store_i64_align_4_with_split_offset(ptr addrspace(3) %out) #0 {
+define amdgpu_kernel void @local_store_i64_align_4_with_split_offset(ptr addrspace(3) %out) nounwind {
   %ptr255 = getelementptr i32, ptr addrspace(3) %out, i32 255
   store i64 0, ptr addrspace(3) %out, align 4
   ret void
@@ -415,7 +415,7 @@ define amdgpu_kernel void @local_store_i64_align_4_with_split_offset(ptr addrspa
 ; UNALIGNED: s_load_dword
 
 ; SI: buffer_store_dword
-define amdgpu_kernel void @constant_unaligned_load_i32(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_unaligned_load_i32(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %v = load i32, ptr addrspace(4) %p, align 1
   store i32 %v, ptr addrspace(1) %r, align 4
   ret void
@@ -427,7 +427,7 @@ define amdgpu_kernel void @constant_unaligned_load_i32(ptr addrspace(4) %p, ptr
 
 ; UNALIGNED: s_load_dword
 ; UNALIGNED: buffer_store_dword
-define amdgpu_kernel void @constant_align2_load_i32(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_align2_load_i32(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %v = load i32, ptr addrspace(4) %p, align 2
   store i32 %v, ptr addrspace(1) %r, align 4
   ret void
@@ -441,7 +441,7 @@ define amdgpu_kernel void @constant_align2_load_i32(ptr addrspace(4) %p, ptr add
 
 ; UNALIGNED: s_load_dwordx4
 ; UNALIGNED: buffer_store_dwordx2
-define amdgpu_kernel void @constant_align2_load_i64(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_align2_load_i64(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %v = load i64, ptr addrspace(4) %p, align 2
   store i64 %v, ptr addrspace(1) %r, align 4
   ret void
@@ -450,7 +450,7 @@ define amdgpu_kernel void @constant_align2_load_i64(ptr addrspace(4) %p, ptr add
 ; SI-LABEL: {{^}}constant_align4_load_i64:
 ; SI: s_load_dwordx2
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @constant_align4_load_i64(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_align4_load_i64(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %v = load i64, ptr addrspace(4) %p, align 4
   store i64 %v, ptr addrspace(1) %r, align 4
   ret void
@@ -459,7 +459,7 @@ define amdgpu_kernel void @constant_align4_load_i64(ptr addrspace(4) %p, ptr add
 ; SI-LABEL: {{^}}constant_align4_load_v4i32:
 ; SI: s_load_dwordx4
 ; SI: buffer_store_dwordx4
-define amdgpu_kernel void @constant_align4_load_v4i32(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_align4_load_v4i32(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %v = load <4 x i32>, ptr addrspace(4) %p, align 4
   store <4 x i32> %v, ptr addrspace(1) %r, align 4
   ret void
@@ -479,7 +479,7 @@ define amdgpu_kernel void @constant_align4_load_v4i32(ptr addrspace(4) %p, ptr a
 ; UNALIGNED: buffer_load_dwordx2
 
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @constant_unaligned_load_v2i32(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_unaligned_load_v2i32(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %v = load <2 x i32>, ptr addrspace(4) %p, align 1
   store <2 x i32> %v, ptr addrspace(1) %r, align 4
   ret void
@@ -509,7 +509,7 @@ define amdgpu_kernel void @constant_unaligned_load_v2i32(ptr addrspace(4) %p, pt
 ; UNALIGNED: buffer_load_dwordx4
 
 ; SI: buffer_store_dwordx4
-define amdgpu_kernel void @constant_unaligned_load_v4i32(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_unaligned_load_v4i32(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %v = load <4 x i32>, ptr addrspace(4) %p, align 1
   store <4 x i32> %v, ptr addrspace(1) %r, align 4
   ret void
@@ -518,7 +518,7 @@ define amdgpu_kernel void @constant_unaligned_load_v4i32(ptr addrspace(4) %p, pt
 ; SI-LABEL: {{^}}constant_align4_load_i8:
 ; SI: s_load_dword
 ; SI: buffer_store_byte
-define amdgpu_kernel void @constant_align4_load_i8(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_align4_load_i8(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %v = load i8, ptr addrspace(4) %p, align 4
   store i8 %v, ptr addrspace(1) %r, align 4
   ret void
@@ -527,7 +527,7 @@ define amdgpu_kernel void @constant_align4_load_i8(ptr addrspace(4) %p, ptr addr
 ; SI-LABEL: {{^}}constant_align2_load_i8:
 ; SI: buffer_load_ubyte
 ; SI: buffer_store_byte
-define amdgpu_kernel void @constant_align2_load_i8(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_align2_load_i8(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %v = load i8, ptr addrspace(4) %p, align 2
   store i8 %v, ptr addrspace(1) %r, align 2
   ret void
@@ -538,7 +538,7 @@ define amdgpu_kernel void @constant_align2_load_i8(ptr addrspace(4) %p, ptr addr
 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]]
 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]]
 ; SI: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @constant_align4_merge_load_2_i32(ptr addrspace(4) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @constant_align4_merge_load_2_i32(ptr addrspace(4) %p, ptr addrspace(1) %r) nounwind {
   %gep0 = getelementptr i32, ptr addrspace(4) %p, i64 1
   %v0 = load i32, ptr addrspace(4) %p, align 4
   %v1 = load i32, ptr addrspace(4) %gep0, align 4
@@ -568,7 +568,7 @@ define amdgpu_kernel void @constant_align4_merge_load_2_i32(ptr addrspace(4) %p,
 ; SI: ds_read_u8
 
 ; SI: ScratchSize: 0{{$}}
-define amdgpu_kernel void @local_load_align1_v16i8(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
+define amdgpu_kernel void @local_load_align1_v16i8(ptr addrspace(1) %out, ptr addrspace(3) %in) nounwind {
   %ld = load <16 x i8>, ptr addrspace(3) %in, align 1
   store <16 x i8> %ld, ptr addrspace(1) %out
   ret void
@@ -593,7 +593,7 @@ define amdgpu_kernel void @local_load_align1_v16i8(ptr addrspace(1) %out, ptr ad
 ; SI: ds_write_b8
 
 ; SI: ScratchSize: 0{{$}}
-define amdgpu_kernel void @local_store_align1_v16i8(ptr addrspace(3) %out) #0 {
+define amdgpu_kernel void @local_store_align1_v16i8(ptr addrspace(3) %out) nounwind {
   store <16 x i8> zeroinitializer, ptr addrspace(3) %out, align 1
   ret void
 }
@@ -623,7 +623,7 @@ define double @private_load_align1_f64(ptr addrspace(5) %in) {
 ; MUBUF: buffer_store_byte
 ; MUBUF: buffer_store_byte
 ; FLATSCR: scratch_store_dwordx2
-define void @private_store_align1_f64(ptr addrspace(5) %out, double %x) #0 {
+define void @private_store_align1_f64(ptr addrspace(5) %out, double %x) nounwind {
   store double %x, ptr addrspace(5) %out, align 1
   ret void
 }
@@ -641,7 +641,7 @@ define double @private_load_align4_f64(ptr addrspace(5) %in) {
 ; MUBUF: buffer_store_dword
 ; MUBUF: buffer_store_dword
 ; FLATSCR: scratch_store_dwordx2
-define void @private_store_align4_f64(ptr addrspace(5) %out, double %x) #0 {
+define void @private_store_align4_f64(ptr addrspace(5) %out, double %x) nounwind {
   store double %x, ptr addrspace(5) %out, align 4
   ret void
 }
@@ -663,13 +663,13 @@ define double @private_load_align2_f64(ptr addrspace(5) %in) {
 ; MUBUF: buffer_store_short
 ; MUBUF: buffer_store_short
 ; FLATSCR: scratch_store_dwordx2
-define void @private_store_align2_f64(ptr addrspace(5) %out, double %x) #0 {
+define void @private_store_align2_f64(ptr addrspace(5) %out, double %x) nounwind {
   store double %x, ptr addrspace(5) %out, align 2
   ret void
 }
 
 ; Should not merge this to a dword store
-define amdgpu_kernel void @global_store_2xi16_align2(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 {
+define amdgpu_kernel void @global_store_2xi16_align2(ptr addrspace(1) %p, ptr addrspace(1) %r) nounwind {
   %gep.r = getelementptr i16, ptr addrspace(1) %r, i64 1
   %v = load i16, ptr addrspace(1) %p, align 2
   store i16 1, ptr addrspace(1) %r, align 2
@@ -678,7 +678,7 @@ define amdgpu_kernel void @global_store_2xi16_align2(ptr addrspace(1) %p, ptr ad
 }
 
 ; Should not merge this to a word load
-define i32 @load_2xi16_align2(ptr addrspace(1) %p) #0 {
+define i32 @load_2xi16_align2(ptr addrspace(1) %p) nounwind {
   %gep.p = getelementptr i16, ptr addrspace(1) %p, i64 1
   %p.0 = load i16, ptr addrspace(1) %p, align 2
   %p.1 = load i16, ptr addrspace(1) %gep.p, align 2
@@ -688,5 +688,3 @@ define i32 @load_2xi16_align2(ptr addrspace(1) %p) #0 {
   %or = or i32 %zext.0, %shl.1
   ret i32 %or
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
index f9853017b9d3fe..139e91631968ef 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
@@ -1,15 +1,13 @@
 # RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -run-pass si-optimize-exec-masking -verify-machineinstrs -o -  %s | FileCheck %s
 --- |
-  define amdgpu_kernel void @undefined_physreg_sgpr_spill() #0 {
+  define amdgpu_kernel void @undefined_physreg_sgpr_spill() nounwind "amdgpu-num-sgpr"="16" {
     unreachable
   }
 
-  define amdgpu_kernel void @undefined_physreg_sgpr_spill_reorder() #0 {
+  define amdgpu_kernel void @undefined_physreg_sgpr_spill_reorder() nounwind "amdgpu-num-sgpr"="16" {
     unreachable
   }
 
-  attributes #0 = { nounwind "amdgpu-num-sgpr"="16" }
-
 ...
 ---
 
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
index 0acee5bd5ac19d..9ef6e6de374e26 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
+++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
@@ -5,7 +5,7 @@
 ; We may have subregister live ranges that are undefined on some paths. The
 ; verifier should not complain about this.
 
-define amdgpu_kernel void @func() #0 {
+define amdgpu_kernel void @func() nounwind {
 ; CHECK-LABEL: func:
 ; CHECK:       ; %bb.0: ; %B0
 ; CHECK-NEXT:    s_mov_b32 s0, 0
@@ -40,7 +40,7 @@ B30.2:
 
 ; FIXME: Extra undef subregister copy should be removed before
 ; overwritten with defined copy
-define amdgpu_ps float @valley_partially_undef_copy() #0 {
+define amdgpu_ps float @valley_partially_undef_copy() nounwind {
 ; CHECK-LABEL: valley_partially_undef_copy:
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    s_mov_b32 s3, 0xf000
@@ -90,7 +90,7 @@ bb11:                                             ; preds = %bb9
 }
 
 ; FIXME: Should be able to remove the undef copies
-define amdgpu_kernel void @partially_undef_copy() #0 {
+define amdgpu_kernel void @partially_undef_copy() nounwind {
 ; CHECK-LABEL: partially_undef_copy:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    ;;#ASMSTART
@@ -123,7 +123,4 @@ define amdgpu_kernel void @partially_undef_copy() #0 {
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll b/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll
index 8a85e1e78ce7dd..4035890f5d17bb 100644
--- a/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll
+++ b/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll
@@ -5,7 +5,7 @@
 ; SI hits an assertion at -O0, evergreen hits a not implemented unreachable.
 
 ; COMMON-LABEL: {{^}}branch_true:
-define amdgpu_kernel void @branch_true(ptr addrspace(1) nocapture %main, i32 %main_stride) #0 {
+define amdgpu_kernel void @branch_true(ptr addrspace(1) nocapture %main, i32 %main_stride) nounwind {
 entry:
   br i1 true, label %for.end, label %for.body.lr.ph
 
@@ -36,7 +36,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; COMMON-LABEL: {{^}}branch_false:
 ; SI: s_cbranch_scc1
 ; SI: s_endpgm
-define amdgpu_kernel void @branch_false(ptr addrspace(1) nocapture %main, i32 %main_stride) #0 {
+define amdgpu_kernel void @branch_false(ptr addrspace(1) nocapture %main, i32 %main_stride) nounwind {
 entry:
   br i1 false, label %for.end, label %for.body.lr.ph
 
@@ -68,7 +68,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; SI: s_cbranch_scc1
 ; SI: s_cbranch_scc1
 ; SI: s_endpgm
-define amdgpu_kernel void @branch_undef(ptr addrspace(1) nocapture %main, i32 %main_stride) #0 {
+define amdgpu_kernel void @branch_undef(ptr addrspace(1) nocapture %main, i32 %main_stride) nounwind {
 entry:
   br i1 undef, label %for.end, label %for.body.lr.ph
 
@@ -95,5 +95,3 @@ for.body:                                         ; preds = %for.body, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll b/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll
index 7417f865b51734..0e4e4e13c41556 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll
@@ -14,7 +14,7 @@ main_body:
 
 if:
   %u = fadd float %v, %v
-  call void asm sideeffect "", ""() #0 ; Prevent ifconversion
+  call void asm sideeffect "", ""() nounwind memory(argmem: read) ; Prevent ifconversion
   br label %else
 
 else:
@@ -22,6 +22,4 @@ else:
   ret float %r
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind memory(argmem: read) }
+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg) nounwind memory(argmem: read)
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
index f60a274f1e592b..ef730472228272 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -539,7 +539,7 @@ define amdgpu_kernel void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, p
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %cmp0 = icmp sgt i32 %cond0, 0
   br i1 %cmp0, label %bb2, label %bb9
 
@@ -642,7 +642,7 @@ define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 entry:
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %d_cmp = icmp ult i32 %tid, 16
   br i1 %d_cmp, label %if, label %endif
 
@@ -711,7 +711,7 @@ entry:
 
 if:
   store i32 0, ptr addrspace(1) %out
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %d_cmp = icmp ult i32 %tid, 16
   br i1 %d_cmp, label %if_uniform, label %endif
 
@@ -779,7 +779,7 @@ define amdgpu_kernel void @divergent_if_uniform_if(ptr addrspace(1) %out, i32 %c
 ; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
 entry:
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %d_cmp = icmp eq i32 %tid, 0
   br i1 %d_cmp, label %if, label %endif
 
@@ -845,7 +845,7 @@ define amdgpu_kernel void @cse_uniform_condition_different_blocks(i32 %cond, ptr
 ; VI-NEXT:  .LBB14_2: ; %bb9
 ; VI-NEXT:    s_endpgm
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp1 = icmp sgt i32 %cond, 0
   br i1 %tmp1, label %bb2, label %bb9
 
@@ -1198,8 +1198,6 @@ bb3:                                              ; preds = %bb2, %bb1
   br label %bb1
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll
index 426c83d0014981..c51ae9d513411a 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll
@@ -40,7 +40,7 @@ bb3:                                              ; preds = %bb3, %bb2
   br i1 %tmp4, label %bb5, label %bb3
 
 bb5:                                              ; preds = %bb3, %bb
-  %tmp6 = tail call i32 @llvm.amdgcn.workitem.id.y() #1
+  %tmp6 = tail call i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
   %tmp10 = icmp ult i32 %tmp6, %arg
   br i1 %tmp10, label %bb11, label %bb12
 
@@ -52,6 +52,4 @@ bb12:                                             ; preds = %bb11, %bb5
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
index 3597d9a7010d35..063b6e0c3f4342 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
@@ -35,7 +35,7 @@ out:
 ; CHECK-NEXT: s_cbranch_execz
 define amdgpu_kernel void @test2(ptr addrspace(1) %out, i32 %a, i32 %b) {
 main_body:
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readonly
   %cc = icmp eq i32 %tid, 0
   br i1 %cc, label %done1, label %if
 
@@ -61,6 +61,4 @@ done1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #1 = { nounwind readonly }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
index 5386ef425dcb58..7cde39beb1a343 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
@@ -8,7 +8,7 @@
 ; with %c (thus replacing %c2 with %c in this example).
 
 
-define amdgpu_ps float @uniform_phi_with_undef(float inreg %c, float %v, i32 %x, i32 %y) #0 {
+define amdgpu_ps float @uniform_phi_with_undef(float inreg %c, float %v, i32 %x, i32 %y) nounwind optnone noinline {
 ; GCN-LABEL: uniform_phi_with_undef:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    v_cmp_lt_i32_e64 s2, v2, v1
@@ -48,5 +48,3 @@ end:
   %r = fadd float %v2, %c2
   ret float %r
 }
-
-attributes #0 = { nounwind optnone noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
index 049db01badacf9..a431af1afbed0b 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -9,7 +9,7 @@
 ;.
 ; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
 ;.
-define void @foo() #0 {
+define void @foo() "uniform-work-group-size"="true" {
 ; CHECK-LABEL: define {{[^@]+}}@foo
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    store i32 0, ptr @x, align 4
@@ -28,8 +28,6 @@ define amdgpu_kernel void @kernel1() #1 {
   call void @foo()
   ret void
 }
-
-attributes #0 = { "uniform-work-group-size"="true" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
index c9387f196dff96..47a41806ef6b7a 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -29,7 +29,7 @@ define internal void @internal1() {
   ret void
 }
 
-define amdgpu_kernel void @kernel1() #0 {
+define amdgpu_kernel void @kernel1() "uniform-work-group-size"="true" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel1
 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    call void @weak()
@@ -85,7 +85,7 @@ define internal void @internal2() {
   ret void
 }
 
-define amdgpu_kernel void @kernel2() #0 {
+define amdgpu_kernel void @kernel2() "uniform-work-group-size"="true" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel2
 ; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:    call void @internal2()
@@ -94,8 +94,6 @@ define amdgpu_kernel void @kernel2() #0 {
   call void @internal2()
   ret void
 }
-
-attributes #0 = { "uniform-work-group-size"="true" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
index 7183da2c5efc36..dd40704bd039d7 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -29,7 +29,7 @@ define void @func2() #1 {
   ret void
 }
 
-define amdgpu_kernel void @kernel3() #2 {
+define amdgpu_kernel void @kernel3() "uniform-work-group-size"="true" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel3
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    call void @func2()
@@ -38,8 +38,6 @@ define amdgpu_kernel void @kernel3() #2 {
   call void @func2()
   ret void
 }
-
-attributes #2 = { "uniform-work-group-size"="true" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
index 6ed04cf63d20be..8e8738d55eea0d 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -19,7 +19,7 @@ define void @func() #0 {
   ret void
 }
 
-define amdgpu_kernel void @kernel1() #1 {
+define amdgpu_kernel void @kernel1() "uniform-work-group-size"="true" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel1
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    call void @func()
@@ -38,8 +38,6 @@ define amdgpu_kernel void @kernel2() #2 {
   call void @func()
   ret void
 }
-
-attributes #1 = { "uniform-work-group-size"="true" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
index d5ba2fd617c6ef..1ec56e2ff1e923 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
@@ -7,7 +7,7 @@
 ;.
 ; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
 ;.
-define void @func() #0 {
+define void @func() nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@func
 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    store i32 0, ptr @x, align 4
@@ -17,7 +17,7 @@ define void @func() #0 {
   ret void
 }
 
-define amdgpu_kernel void @kernel1() #1 {
+define amdgpu_kernel void @kernel1() "uniform-work-group-size"="false" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel1
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    call void @func()
@@ -28,7 +28,7 @@ define amdgpu_kernel void @kernel1() #1 {
 }
 
 ; External declaration of a function
-define weak_odr void @weak_func() #0 {
+define weak_odr void @weak_func() nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@weak_func
 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    store i32 0, ptr @x, align 4
@@ -38,7 +38,7 @@ define weak_odr void @weak_func() #0 {
   ret void
 }
 
-define amdgpu_kernel void @kernel2() #2 {
+define amdgpu_kernel void @kernel2() "uniform-work-group-size"="true" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel2
 ; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    call void @weak_func()
@@ -47,10 +47,6 @@ define amdgpu_kernel void @kernel2() #2 {
   call void @weak_func()
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { "uniform-work-group-size"="false" }
-attributes #2 = { "uniform-work-group-size"="true" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 7f0dfeaf75c808..b39f92b363c3c3 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -4,7 +4,7 @@
 ; Test to ensure recursive functions exhibit proper behaviour
 ; Test to generate fibonacci numbers
 
-define i32 @fib(i32 %n) #0 {
+define i32 @fib(i32 %n) nounwind readnone {
 ; CHECK-LABEL: define {{[^@]+}}@fib
 ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[N]], 0
@@ -42,7 +42,7 @@ exit:
   ret i32 1
 }
 
-define internal i32 @fib_internal(i32 %n) #0 {
+define internal i32 @fib_internal(i32 %n) nounwind readnone {
 ; CHECK-LABEL: define {{[^@]+}}@fib_internal
 ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[N]], 0
@@ -80,7 +80,7 @@ exit:
   ret i32 1
 }
 
-define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
+define amdgpu_kernel void @kernel(ptr addrspace(1) %m) "uniform-work-group-size"="true" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel
 ; CHECK-SAME: (ptr addrspace(1) [[M:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @fib(i32 5)
@@ -98,8 +98,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
 }
 
 ; nounwind and readnone are added to match attributor results.
-attributes #0 = { nounwind readnone }
-attributes #1 = { "uniform-work-group-size"="true" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
index 8616c73ad51c13..6d9be932b72b92 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -25,7 +25,7 @@ define void @func4() {
   ret void
 }
 
-define void @func2() #0 {
+define void @func2() "uniform-work-group-size"="false" {
 ; CHECK-LABEL: define {{[^@]+}}@func2
 ; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    call void @func4()
@@ -47,7 +47,7 @@ define void @func3() {
   ret void
 }
 
-define amdgpu_kernel void @kernel3() #0 {
+define amdgpu_kernel void @kernel3() "uniform-work-group-size"="false" {
 ; CHECK-LABEL: define {{[^@]+}}@kernel3
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    call void @func2()
@@ -58,8 +58,6 @@ define amdgpu_kernel void @kernel3() #0 {
   call void @func3()
   ret void
 }
-
-attributes #0 = { "uniform-work-group-size"="false" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
index fd0c74a192d386..48e378867005bd 100644
--- a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
@@ -9,14 +9,14 @@
 ;
 ; Check for a valid output.
 ; CHECK: image_sample_c
-define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main(ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg1, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg2, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg3, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) #0 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main(ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg1, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg2, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg3, ptr addrspace(4) inreg dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" {
 main_body:
   %i.i = extractelement <2 x i32> %arg8, i32 0
   %j.i = extractelement <2 x i32> %arg8, i32 1
   %i.f.i = bitcast i32 %i.i to float
   %j.f.i = bitcast i32 %j.i to float
-  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) #2
-  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) #2
+  %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) nounwind readonly
+  %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) nounwind readonly
   %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
 
   %tmp24 = extractelement <4 x float> %tmp23, i32 3
@@ -97,14 +97,9 @@ ENDIF28:                                          ; preds = %LOOP
   br label %LOOP
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-
-attributes #0 = { nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-attributes #3 = { nounwind }
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/unpack-half.ll b/llvm/test/CodeGen/AMDGPU/unpack-half.ll
index f1dbc7f07f5958..5ad575ce116367 100644
--- a/llvm/test/CodeGen/AMDGPU/unpack-half.ll
+++ b/llvm/test/CodeGen/AMDGPU/unpack-half.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: v_cvt_f32_f16
 
-define amdgpu_gs void @main(i32 inreg %arg) local_unnamed_addr #0 {
+define amdgpu_gs void @main(i32 inreg %arg) local_unnamed_addr nounwind {
 .entry:
   %tmp = load volatile float, ptr addrspace(1) undef
   %tmp1 = bitcast float %tmp to i32
@@ -22,5 +22,3 @@ define amdgpu_gs void @main(i32 inreg %arg) local_unnamed_addr #0 {
   store volatile i32 %tmp6, ptr addrspace(1) undef
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll
index 95fb8a802277f8..dc90122865715d 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll
@@ -14,6 +14,4 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll
index 57ba4111b353b6..7eee5158031731 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll
@@ -16,6 +16,4 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
-
-attributes #0 = { nounwind readonly }
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index a53532f53e0200..d3ff0d50efba36 100644
--- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -1,17 +1,17 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
 
-declare float @llvm.fma.f32(float, float, float) #1
-declare double @llvm.fma.f64(double, double, double) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare float @llvm.amdgcn.div.fixup.f32(float, float, float) #1
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+declare float @llvm.amdgcn.div.fixup.f32(float, float, float) nounwind readnone
 
 
 ; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
 ; GCN: s_load_dword [[SGPR:s[0-9]+]],
 ; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_sgpr_use_twice_binop(ptr addrspace(1) %out, float %a) #0 {
+define amdgpu_kernel void @test_sgpr_use_twice_binop(ptr addrspace(1) %out, float %a) nounwind {
   %dbl = fadd float %a, %a
   store float %dbl, ptr addrspace(1) %out, align 4
   ret void
@@ -21,8 +21,8 @@ define amdgpu_kernel void @test_sgpr_use_twice_binop(ptr addrspace(1) %out, floa
 ; GCN: s_load_dword [[SGPR:s[0-9]+]],
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_sgpr_use_three_ternary_op(ptr addrspace(1) %out, float %a) #0 {
-  %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
+define amdgpu_kernel void @test_sgpr_use_three_ternary_op(ptr addrspace(1) %out, float %a) nounwind {
+  %fma = call float @llvm.fma.f32(float %a, float %a, float %a) nounwind readnone
   store float %fma, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -33,8 +33,8 @@ define amdgpu_kernel void @test_sgpr_use_three_ternary_op(ptr addrspace(1) %out,
 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[#LOAD + 3]]
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[#LOAD + 2]], s[[#LOAD + 2]], [[VGPR1]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_b(ptr addrspace(1) %out, float %a, float %b) #0 {
-  %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
+define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_b(ptr addrspace(1) %out, float %a, float %b) nounwind {
+  %fma = call float @llvm.fma.f32(float %a, float %a, float %b) nounwind readnone
   store float %fma, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -62,11 +62,11 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_b(ptr addrspace(1)
 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[#LOAD + 2]], [[VA1]], [[VB]]
 ; GCN: buffer_store_dword [[RESULT0]]
 ; GCN: buffer_store_dword [[RESULT1]]
-define amdgpu_kernel void @test_use_s_v_s(ptr addrspace(1) %out, float %a, float %b, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_use_s_v_s(ptr addrspace(1) %out, float %a, float %b, ptr addrspace(1) %in) nounwind {
   %va0 = load volatile float, ptr addrspace(1) %in
   %va1 = load volatile float, ptr addrspace(1) %in
-  %fma0 = call float @llvm.fma.f32(float %a, float %va0, float %b) #1
-  %fma1 = call float @llvm.fma.f32(float %a, float %va1, float %b) #1
+  %fma0 = call float @llvm.fma.f32(float %a, float %va0, float %b) nounwind readnone
+  %fma1 = call float @llvm.fma.f32(float %a, float %va1, float %b) nounwind readnone
   store volatile float %fma0, ptr addrspace(1) %out
   store volatile float %fma1, ptr addrspace(1) %out
   ret void
@@ -78,8 +78,8 @@ define amdgpu_kernel void @test_use_s_v_s(ptr addrspace(1) %out, float %a, float
 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[#LOAD + 3]]
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], s[[#LOAD + 2]], [[VGPR1]], s[[#LOAD + 2]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_b_a(ptr addrspace(1) %out, float %a, float %b) #0 {
-  %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
+define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_b_a(ptr addrspace(1) %out, float %a, float %b) nounwind {
+  %fma = call float @llvm.fma.f32(float %a, float %b, float %a) nounwind readnone
   store float %fma, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -90,8 +90,8 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_b_a(ptr addrspace(1)
 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s[[#LOAD + 3]]
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], s[[#LOAD + 2]], s[[#LOAD + 2]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_b_a_a(ptr addrspace(1) %out, float %a, float %b) #0 {
-  %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
+define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_b_a_a(ptr addrspace(1) %out, float %a, float %b) nounwind {
+  %fma = call float @llvm.fma.f32(float %b, float %a, float %a) nounwind readnone
   store float %fma, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -100,8 +100,8 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_b_a_a(ptr addrspace(1)
 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_imm(ptr addrspace(1) %out, float %a) #0 {
-  %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
+define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_imm(ptr addrspace(1) %out, float %a) nounwind {
+  %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) nounwind readnone
   store float %fma, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -110,8 +110,8 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_imm(ptr addrspace(
 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_imm_a(ptr addrspace(1) %out, float %a) #0 {
-  %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
+define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_imm_a(ptr addrspace(1) %out, float %a) nounwind {
+  %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) nounwind readnone
   store float %fma, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -121,8 +121,8 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_imm_a(ptr addrspace(
 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
 ; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_imm_a_a(ptr addrspace(1) %out, float %a) #0 {
-  %val = call float @llvm.amdgcn.div.fixup.f32(float 2.0, float %a, float %a) #1
+define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_imm_a_a(ptr addrspace(1) %out, float %a) nounwind {
+  %val = call float @llvm.amdgcn.div.fixup.f32(float 2.0, float %a, float %a) nounwind readnone
   store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -132,8 +132,8 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_imm_a_a(ptr addrspace(
 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_kimm(ptr addrspace(1) %out, float %a) #0 {
-  %fma = call float @llvm.fma.f32(float %a, float %a, float 1024.0) #1
+define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_kimm(ptr addrspace(1) %out, float %a) nounwind {
+  %fma = call float @llvm.fma.f32(float %a, float %a, float 1024.0) nounwind readnone
   store float %fma, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -144,8 +144,8 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_kimm(ptr addrspace
 ; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x44800000
 ; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[SK]], [[SK]], [[VGPR]]
 ; GCN: buffer_store_dword [[RESULT0]]
-define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s(ptr addrspace(1) %out, float %a) #0 {
-  %fma = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
+define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s(ptr addrspace(1) %out, float %a) nounwind {
+  %fma = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) nounwind readnone
   store float %fma, ptr addrspace(1) %out
   ret void
 }
@@ -160,9 +160,9 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s(ptr addrspace
 ; GCN: buffer_store_dword [[RESULT0]]
 ; GCN: buffer_store_dword [[RESULT1]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s_x2(ptr addrspace(1) %out, float %a, float %b) #0 {
-  %fma0 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
-  %fma1 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %b) #1
+define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s_x2(ptr addrspace(1) %out, float %a, float %b) nounwind {
+  %fma0 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) nounwind readnone
+  %fma1 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %b) nounwind readnone
   store volatile float %fma0, ptr addrspace(1) %out
   store volatile float %fma1, ptr addrspace(1) %out
   ret void
@@ -174,8 +174,8 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s_x2(ptr addrsp
 ; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x44800000
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR]], [[SK]], [[SK]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k(ptr addrspace(1) %out, float %a) #0 {
-  %fma = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
+define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k(ptr addrspace(1) %out, float %a) nounwind {
+  %fma = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) nounwind readnone
   store float %fma, ptr addrspace(1) %out
   ret void
 }
@@ -190,9 +190,9 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k(ptr addrspace
 ; GCN: buffer_store_dword [[RESULT0]]
 ; GCN: buffer_store_dword [[RESULT1]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k_x2(ptr addrspace(1) %out, float %a, float %b) #0 {
-  %fma0 = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
-  %fma1 = call float @llvm.fma.f32(float 1024.0, float %b, float 1024.0) #1
+define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k_x2(ptr addrspace(1) %out, float %a, float %b) nounwind {
+  %fma0 = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) nounwind readnone
+  %fma1 = call float @llvm.fma.f32(float 1024.0, float %b, float 1024.0) nounwind readnone
   store volatile float %fma0, ptr addrspace(1) %out
   store volatile float %fma1, ptr addrspace(1) %out
   ret void
@@ -204,8 +204,8 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k_x2(ptr addrsp
 ; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x44800000
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR]], [[SK]], [[SK]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k(ptr addrspace(1) %out, float %a) #0 {
-  %fma = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
+define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k(ptr addrspace(1) %out, float %a) nounwind {
+  %fma = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) nounwind readnone
   store float %fma, ptr addrspace(1) %out
   ret void
 }
@@ -220,9 +220,9 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k(ptr addrspace
 ; GCN: buffer_store_dword [[RESULT0]]
 ; GCN: buffer_store_dword [[RESULT1]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(ptr addrspace(1) %out, float %a, float %b) #0 {
-  %fma0 = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
-  %fma1 = call float @llvm.fma.f32(float %b, float 1024.0, float 1024.0) #1
+define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(ptr addrspace(1) %out, float %a, float %b) nounwind {
+  %fma0 = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) nounwind readnone
+  %fma1 = call float @llvm.fma.f32(float %b, float 1024.0, float 1024.0) nounwind readnone
   store volatile float %fma0, ptr addrspace(1) %out
   store volatile float %fma1, ptr addrspace(1) %out
   ret void
@@ -240,9 +240,9 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(ptr addrsp
 
 ; GCN: buffer_store_dword [[RESULT0]]
 ; GCN: buffer_store_dword [[RESULT1]]
-define amdgpu_kernel void @test_s0_s1_k_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
-  %fma0 = call float @llvm.fma.f32(float %a, float %b, float 1024.0) #1
-  %fma1 = call float @llvm.fma.f32(float %a, float %b, float 4096.0) #1
+define amdgpu_kernel void @test_s0_s1_k_f32(ptr addrspace(1) %out, float %a, float %b) nounwind {
+  %fma0 = call float @llvm.fma.f32(float %a, float %b, float 1024.0) nounwind readnone
+  %fma1 = call float @llvm.fma.f32(float %a, float %b, float 4096.0) nounwind readnone
   store volatile float %fma0, ptr addrspace(1) %out
   store volatile float %fma1, ptr addrspace(1) %out
   ret void
@@ -264,13 +264,10 @@ define amdgpu_kernel void @test_s0_s1_k_f32(ptr addrspace(1) %out, float %a, flo
 
 ; GCN: buffer_store_dwordx2 [[RESULT0]]
 ; GCN: buffer_store_dwordx2 [[RESULT1]]
-define amdgpu_kernel void @test_s0_s1_k_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) #0 {
-  %fma0 = call double @llvm.fma.f64(double %a, double %b, double 1024.0) #1
-  %fma1 = call double @llvm.fma.f64(double %a, double %b, double 4096.0) #1
+define amdgpu_kernel void @test_s0_s1_k_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) nounwind {
+  %fma0 = call double @llvm.fma.f64(double %a, double %b, double 1024.0) nounwind readnone
+  %fma1 = call double @llvm.fma.f64(double %a, double %b, double 4096.0) nounwind readnone
   store volatile double %fma0, ptr addrspace(1) %out
   store volatile double %fma1, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll
index 666ae7c126ae3e..82deaad979f373 100644
--- a/llvm/test/CodeGen/AMDGPU/usubo.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubo.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
 
 
-define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 ; SI-LABEL: s_usubo_i64_zext:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -62,7 +62,7 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GFX9-NEXT:    s_endpgm
-  %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) #0
+  %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %usub, 0
   %carry = extractvalue { i64, i1 } %usub, 1
   %ext = zext i1 %carry to i64
@@ -72,7 +72,7 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
 }
 
 ; FIXME: Could do scalar
-define amdgpu_kernel void @s_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: s_usubo_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -129,7 +129,7 @@ define amdgpu_kernel void @s_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_usubo_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -207,7 +207,7 @@ define amdgpu_kernel void @v_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_usubo_i32_novcc(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_usubo_i32_novcc(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_usubo_i32_novcc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -293,12 +293,12 @@ define amdgpu_kernel void @v_usubo_i32_novcc(ptr addrspace(1) %out, ptr addrspac
   %val = extractvalue { i32, i1 } %uadd, 0
   %carry = extractvalue { i32, i1 } %uadd, 1
   store volatile i32 %val, ptr addrspace(1) %out, align 4
-  call void asm sideeffect "", "~{vcc}"() #0
+  call void asm sideeffect "", "~{vcc}"() nounwind
   store volatile i1 %carry, ptr addrspace(1) %carryout
   ret void
 }
 
-define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) #0 {
+define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) nounwind {
 ; SI-LABEL: s_usubo_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -367,7 +367,7 @@ define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_usubo_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -451,7 +451,7 @@ define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @v_usubo_i16(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_usubo_i16(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_usubo_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -615,7 +615,7 @@ define amdgpu_kernel void @v_usubo_v2i32(ptr addrspace(1) %out, ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @s_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind {
 ; SI-LABEL: s_usubo_clamp_bit:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
@@ -704,7 +704,7 @@ exit:
 }
 
 
-define amdgpu_kernel void @v_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
+define amdgpu_kernel void @v_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) nounwind {
 ; SI-LABEL: v_usubo_clamp_bit:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x9
@@ -809,11 +809,8 @@ exit:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare { i16, i1 } @llvm.usub.with.overflow.i16(i16, i16) #1
-declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
-declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare { i16, i1 } @llvm.usub.with.overflow.i16(i16, i16) nounwind readnone
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
 declare { <2 x i32>, <2 x i1> } @llvm.usub.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll
index 8cc7025d671c47..7e5b85405bcfee 100644
--- a/llvm/test/CodeGen/AMDGPU/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll
@@ -705,17 +705,15 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
   ret i64 %result
 }
 
-declare i8 @llvm.usub.sat.i8(i8, i8) #0
-declare i16 @llvm.usub.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.usub.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare i32 @llvm.usub.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.usub.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <8 x i32> @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>) #0
-declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>) #0
-declare i64 @llvm.usub.sat.i64(i64, i64) #0
-
-attributes #0 = { nounwind readnone speculatable willreturn }
+declare i8 @llvm.usub.sat.i8(i8, i8) nounwind readnone speculatable willreturn
+declare i16 @llvm.usub.sat.i16(i16, i16) nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) nounwind readnone speculatable willreturn
+declare <3 x i16> @llvm.usub.sat.v3i16(<3 x i16>, <3 x i16>) nounwind readnone speculatable willreturn
+declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone speculatable willreturn
+declare i32 @llvm.usub.sat.i32(i32, i32) nounwind readnone speculatable willreturn
+declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone speculatable willreturn
+declare <3 x i32> @llvm.usub.sat.v3i32(<3 x i32>, <3 x i32>) nounwind readnone speculatable willreturn
+declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone speculatable willreturn
+declare <8 x i32> @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>) nounwind readnone speculatable willreturn
+declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>) nounwind readnone speculatable willreturn
+declare i64 @llvm.usub.sat.i64(i64, i64) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll b/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
index 2fa9750653b6d2..a86bf2b505f3e2 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
@@ -63,10 +63,8 @@ define amdgpu_kernel void @ballot_test(half %x, half %y) {
   ret void
 }
 
-declare i64 @llvm.amdgcn.icmp.i64.i16(i16, i16, i32 immarg) #0
+declare i64 @llvm.amdgcn.icmp.i64.i16(i16, i16, i32 immarg) convergent nounwind readnone willreturn
 
-declare i64 @llvm.amdgcn.fcmp.i64.f16(half, half, i32 immarg) #0
+declare i64 @llvm.amdgcn.fcmp.i64.f16(half, half, i32 immarg) convergent nounwind readnone willreturn
 
-declare i64 @llvm.amdgcn.ballot.i64(i1) #0
-
-attributes #0 = { convergent nounwind readnone willreturn }
+declare i64 @llvm.amdgcn.ballot.i64(i1) convergent nounwind readnone willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
index fc6df735c05b0f..c980e898faf493 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
@@ -4,13 +4,13 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare half @llvm.fabs.f16(half)
 declare float @llvm.fabs.f32(float)
 declare double @llvm.fabs.f64(double)
 
 ; All nan values are converted to 0xffffffff
-define amdgpu_kernel void @v_cnd_nan_nosgpr(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) #0 {
+define amdgpu_kernel void @v_cnd_nan_nosgpr(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) nounwind {
 ; SI-LABEL: v_cnd_nan_nosgpr:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -90,7 +90,7 @@ define amdgpu_kernel void @v_cnd_nan_nosgpr(ptr addrspace(1) %out, i32 %c, ptr a
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %idx = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %f.gep = getelementptr float, ptr addrspace(1) %fptr, i32 %idx
   %f = load float, ptr addrspace(1) %f.gep
   %setcc = icmp ne i32 %c, 0
@@ -104,7 +104,7 @@ define amdgpu_kernel void @v_cnd_nan_nosgpr(ptr addrspace(1) %out, i32 %c, ptr a
 ; never be moved.
 ; However on GFX10 constant bus is limited to 2 scalar operands, not one.
 ; All nan values are converted to 0xffffffff
-define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0 {
+define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) nounwind {
 ; SI-LABEL: v_cnd_nan:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -166,7 +166,7 @@ define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0
 ; Test different compare and select operand types for optimal code
 ; shrinking.
 ; (select (cmp (sgprX, constant)), constant, sgprZ)
-define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %out, [8 x i32], float %x, float %z) #0 {
+define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %out, [8 x i32], float %x, float %z) nounwind {
 ; SI-LABEL: fcmp_sgprX_k0_select_k1_sgprZ_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -223,7 +223,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
   %setcc = fcmp one float %x, 0.0
@@ -232,7 +232,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %out, float %x) nounwind {
 ; SI-LABEL: fcmp_sgprX_k0_select_k1_sgprX_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -289,7 +289,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
   %setcc = fcmp one float %x, 0.0
@@ -298,7 +298,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %out, [8 x i32], float %x, float %z) #0 {
+define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %out, [8 x i32], float %x, float %z) nounwind {
 ; SI-LABEL: fcmp_sgprX_k0_select_k0_sgprZ_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -355,7 +355,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
   %setcc = fcmp one float %x, 0.0
@@ -364,7 +364,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %out, float %x) nounwind {
 ; SI-LABEL: fcmp_sgprX_k0_select_k0_sgprX_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -421,7 +421,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
   %setcc = fcmp one float %x, 0.0
@@ -430,7 +430,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(ptr addrspace(1) %out, float %x, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(ptr addrspace(1) %out, float %x, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: fcmp_sgprX_k0_select_k0_vgprZ_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -504,7 +504,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
@@ -515,7 +515,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %out, float %x, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %out, float %x, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: fcmp_sgprX_k0_select_k1_vgprZ_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -589,7 +589,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
@@ -600,7 +600,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, float %z) #0 {
+define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, float %z) nounwind {
 ; SI-LABEL: fcmp_vgprX_k0_select_k1_sgprZ_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -667,7 +667,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
@@ -678,7 +678,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: fcmp_vgprX_k0_select_k1_vgprZ_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -757,7 +757,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -770,7 +770,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: icmp_vgprX_k0_select_k1_vgprZ_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -849,7 +849,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds i32, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -862,7 +862,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: icmp_vgprX_k0_select_k1_vgprZ_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -945,7 +945,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %o
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds i64, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds i64, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -958,7 +958,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %o
   ret void
 }
 
-define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: fcmp_vgprX_k0_select_vgprZ_k1_v4f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1054,7 +1054,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -1067,7 +1067,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: fcmp_vgprX_k0_select_k1_vgprZ_v4f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1163,7 +1163,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -1178,7 +1178,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(ptr addrspace(1)
 
 ; This must be swapped as a vector type before the condition has
 ; multiple uses.
-define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: fcmp_k0_vgprX_select_k1_vgprZ_v4f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1274,7 +1274,7 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -1287,7 +1287,7 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(ptr addrspace(1)
   ret void
 }
 
-define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: icmp_vgprX_k0_select_k1_vgprZ_i1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1381,7 +1381,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds i1, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -1395,7 +1395,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou
 }
 
 ; Different types compared vs. selected
-define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: fcmp_vgprX_k0_selectf64_k1_vgprZ_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1485,7 +1485,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds double, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -1499,7 +1499,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(ptr addrspace(1)
 }
 
 ; Different types compared vs. selected
-define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: fcmp_vgprX_k0_selecti64_k1_vgprZ_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1587,7 +1587,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds i64, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -1601,7 +1601,7 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(ptr addrspace(1)
 }
 
 ; Different types compared vs. selected
-define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: icmp_vgprX_k0_selectf32_k1_vgprZ_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1680,7 +1680,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1)
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -1694,7 +1694,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1)
 }
 
 ; FIXME: Should be able to handle multiple uses
-define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) #0 {
+define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr addrspace(1) %out, ptr addrspace(1) %x.ptr, ptr addrspace(1) %z.ptr) nounwind {
 ; SI-LABEL: fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1789,7 +1789,7 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tid.ext = sext i32 %tid to i64
   %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext
   %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext
@@ -1805,7 +1805,7 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add
 }
 
 ; Source modifiers abs/neg only work for f32
-define amdgpu_kernel void @v_cndmask_abs_neg_f16(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) #0 {
+define amdgpu_kernel void @v_cndmask_abs_neg_f16(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) nounwind {
 ; SI-LABEL: v_cndmask_abs_neg_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s8, s[0:1], 0xb
@@ -1896,7 +1896,7 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f16(ptr addrspace(1) %out, i32 %c,
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %idx = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %f.gep = getelementptr half, ptr addrspace(1) %fptr, i32 %idx
   %f = load half, ptr addrspace(1) %f.gep
   %f.abs = call half @llvm.fabs.f16(half %f)
@@ -1907,7 +1907,7 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f16(ptr addrspace(1) %out, i32 %c,
   ret void
 }
 
-define amdgpu_kernel void @v_cndmask_abs_neg_f32(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) #0 {
+define amdgpu_kernel void @v_cndmask_abs_neg_f32(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) nounwind {
 ; SI-LABEL: v_cndmask_abs_neg_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -1987,7 +1987,7 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f32(ptr addrspace(1) %out, i32 %c,
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %idx = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %f.gep = getelementptr float, ptr addrspace(1) %fptr, i32 %idx
   %f = load float, ptr addrspace(1) %f.gep
   %f.abs = call float @llvm.fabs.f32(float %f)
@@ -1998,7 +1998,7 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f32(ptr addrspace(1) %out, i32 %c,
   ret void
 }
 
-define amdgpu_kernel void @v_cndmask_abs_neg_f64(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) #0 {
+define amdgpu_kernel void @v_cndmask_abs_neg_f64(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) nounwind {
 ; SI-LABEL: v_cndmask_abs_neg_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s8, s[0:1], 0xb
@@ -2092,7 +2092,7 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f64(ptr addrspace(1) %out, i32 %c,
 ; GFX11-NEXT:    s_nop 0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %idx = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %f.gep = getelementptr double, ptr addrspace(1) %fptr, i32 %idx
   %f = load double, ptr addrspace(1) %f.gep
   %f.abs = call double @llvm.fabs.f64(double %f)
@@ -2102,6 +2102,3 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f64(ptr addrspace(1) %out, i32 %c,
   store double %select, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll b/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll
index 5a4d0792da2031..d92036f30757f0 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.cvt.pk.u8.f32(float, i32, i32) #0
+declare i32 @llvm.amdgcn.cvt.pk.u8.f32(float, i32, i32) nounwind readnone
 
 ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_0:
 ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 0, v{{[0-9]+}}
 define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_0(ptr addrspace(1) %out, float %src, i32 %reg) {
-  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) #0
+  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -14,7 +14,7 @@ define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_0(ptr addrspace(1) %out, float %s
 ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_1:
 ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}}
 define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_1(ptr addrspace(1) %out, float %src, i32 %reg) {
-  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %reg) #0
+  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %reg) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -22,7 +22,7 @@ define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_1(ptr addrspace(1) %out, float %s
 ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_2:
 ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}}
 define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_2(ptr addrspace(1) %out, float %src, i32 %reg) {
-  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %reg) #0
+  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %reg) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -30,7 +30,7 @@ define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_2(ptr addrspace(1) %out, float %s
 ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_3:
 ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 3, v{{[0-9]+}}
 define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_3(ptr addrspace(1) %out, float %src, i32 %reg) {
-  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %reg) #0
+  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %reg) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -41,10 +41,10 @@ define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_3(ptr addrspace(1) %out, float %s
 ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}}
 ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 3, v{{[0-9]+}}
 define amdgpu_kernel void @v_cvt_pk_u8_f32_combine(ptr addrspace(1) %out, float %src, i32 %reg) {
-  %result0 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) #0
-  %result1 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %result0) #0
-  %result2 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %result1) #0
-  %result3 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %result2) #0
+  %result0 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) nounwind readnone
+  %result1 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %result0) nounwind readnone
+  %result2 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %result1) nounwind readnone
+  %result3 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %result2) nounwind readnone
   store i32 %result3, ptr addrspace(1) %out, align 4
   ret void
 }
@@ -52,9 +52,7 @@ define amdgpu_kernel void @v_cvt_pk_u8_f32_combine(ptr addrspace(1) %out, float
 ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx:
 ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @v_cvt_pk_u8_f32_idx(ptr addrspace(1) %out, float %src, i32 %idx, i32 %reg) {
-  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 %idx, i32 %reg) #0
+  %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 %idx, i32 %reg) nounwind readnone
   store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
index 7fe33d5bd5f728..b0d148c1674fa3 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -8,7 +8,7 @@
 ; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
 ; GCN: v_mac_f32_e32 [[C]], [[A]], [[B]]
 ; GCN: buffer_store_dword [[C]]
-define amdgpu_kernel void @mac_vvv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @mac_vvv(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="false" {
 entry:
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -26,7 +26,7 @@ entry:
 ; GCN-LABEL: {{^}}mad_inline_sgpr_inline:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]}}, s{{[0-9]+}}, 0.5, 0.5
-define amdgpu_kernel void @mad_inline_sgpr_inline(ptr addrspace(1) %out, float %in) #0 {
+define amdgpu_kernel void @mad_inline_sgpr_inline(ptr addrspace(1) %out, float %in) nounwind "no-signed-zeros-fp-math"="false" {
 entry:
   %tmp0 = fmul float 0.5, %in
   %tmp1 = fadd float %tmp0, 0.5
@@ -37,7 +37,7 @@ entry:
 ; GCN-LABEL: {{^}}mad_vvs:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @mad_vvs(ptr addrspace(1) %out, ptr addrspace(1) %in, float %c) #0 {
+define amdgpu_kernel void @mad_vvs(ptr addrspace(1) %out, ptr addrspace(1) %in, float %c) nounwind "no-signed-zeros-fp-math"="false" {
 entry:
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
 
@@ -52,7 +52,7 @@ entry:
 
 ; GCN-LABEL: {{^}}mac_ssv:
 ; GCN: v_mac_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @mac_ssv(ptr addrspace(1) %out, ptr addrspace(1) %in, float %a) #0 {
+define amdgpu_kernel void @mac_ssv(ptr addrspace(1) %out, ptr addrspace(1) %in, float %a) nounwind "no-signed-zeros-fp-math"="false" {
 entry:
   %c = load float, ptr addrspace(1) %in
 
@@ -65,7 +65,7 @@ entry:
 ; GCN-LABEL: {{^}}mac_mad_same_add:
 ; GCN: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
 ; GCN: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @mac_mad_same_add(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @mac_mad_same_add(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="false" {
 entry:
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -96,7 +96,7 @@ entry:
 ; GCN-LABEL: {{^}}mad_neg_src0:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @mad_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @mad_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="false" {
 entry:
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -116,7 +116,7 @@ entry:
 ; GCN-LABEL: {{^}}nsz_mad_sub0_src0:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @nsz_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @nsz_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="true" {
 entry:
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -136,7 +136,7 @@ entry:
 ; GCN-LABEL: {{^}}safe_mad_sub0_src0:
 ; GCN: v_sub_f32_e32 [[SUB0:v[0-9]+]], 0,
 ; GCN: v_ma{{[cd]}}_f32{{[_e32]*}} v{{[0-9]+}}, [[SUB0]], v{{[0-9]+}}
-define amdgpu_kernel void @safe_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @safe_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="false" {
 entry:
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -156,7 +156,7 @@ entry:
 ; GCN-LABEL: {{^}}mad_neg_src1:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @mad_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @mad_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="false" {
 entry:
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -176,7 +176,7 @@ entry:
 ; GCN-LABEL: {{^}}nsz_mad_sub0_src1:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @nsz_mad_sub0_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+define amdgpu_kernel void @nsz_mad_sub0_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="true" {
 entry:
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -196,7 +196,7 @@ entry:
 ; GCN-LABEL: {{^}}mad_neg_src2:
 ; GCN-NOT: v_mac
 ; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
-define amdgpu_kernel void @mad_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @mad_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind "no-signed-zeros-fp-math"="false" {
 entry:
   %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
   %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
@@ -222,7 +222,7 @@ entry:
 
 ; GCN: v_add_f32_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
 ; GCN: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
-define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #3 {
+define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -257,7 +257,7 @@ bb:
 
 ; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
 ; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
-define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f16(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #3 {
+define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f16(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -307,9 +307,4 @@ define float @v_mac_f32_dynamic_ftz(float %a, float %b, float %c) "denormal-fp-m
   ret float %mad
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
index d7a837a6e5f718..47863dda6d54bd 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -18,7 +18,7 @@ define amdgpu_kernel void @mac_f16(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -45,7 +45,7 @@ define amdgpu_kernel void @mac_f16_same_add(
     ptr addrspace(1) %b,
     ptr addrspace(1) %c,
     ptr addrspace(1) %d,
-    ptr addrspace(1) %e) #0 {
+    ptr addrspace(1) %e) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -77,7 +77,7 @@ define amdgpu_kernel void @mac_f16_neg_a(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -104,7 +104,7 @@ define amdgpu_kernel void @mac_f16_neg_b(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -131,7 +131,7 @@ define amdgpu_kernel void @mac_f16_neg_c(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -155,7 +155,7 @@ define amdgpu_kernel void @mac_f16_neg_a_safe_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -179,7 +179,7 @@ define amdgpu_kernel void @mac_f16_neg_b_safe_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -203,7 +203,7 @@ define amdgpu_kernel void @mac_f16_neg_c_safe_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -230,7 +230,7 @@ define amdgpu_kernel void @mac_f16_neg_a_nsz_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #1 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -257,7 +257,7 @@ define amdgpu_kernel void @mac_f16_neg_b_nsz_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #1 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -284,7 +284,7 @@ define amdgpu_kernel void @mac_f16_neg_c_nsz_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #1 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -336,12 +336,12 @@ define amdgpu_kernel void @mac_v2f16(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
-  call void @llvm.amdgcn.s.barrier() #2
+  call void @llvm.amdgcn.s.barrier() nounwind convergent
   %b.val = load <2 x half>, ptr addrspace(1) %b
-  call void @llvm.amdgcn.s.barrier() #2
+  call void @llvm.amdgcn.s.barrier() nounwind convergent
   %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %t.val = fmul <2 x half> %a.val, %b.val
@@ -370,7 +370,7 @@ define amdgpu_kernel void @mac_v2f16_same_add(
     ptr addrspace(1) %b,
     ptr addrspace(1) %c,
     ptr addrspace(1) %d,
-    ptr addrspace(1) %e) #0 {
+    ptr addrspace(1) %e) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -404,7 +404,7 @@ define amdgpu_kernel void @mac_v2f16_neg_a(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -433,7 +433,7 @@ define amdgpu_kernel void @mac_v2f16_neg_b(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -466,7 +466,7 @@ define amdgpu_kernel void @mac_v2f16_neg_c(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -498,7 +498,7 @@ define amdgpu_kernel void @mac_v2f16_neg_a_safe_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -530,7 +530,7 @@ define amdgpu_kernel void @mac_v2f16_neg_b_safe_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -562,7 +562,7 @@ define amdgpu_kernel void @mac_v2f16_neg_c_safe_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -595,7 +595,7 @@ define amdgpu_kernel void @mac_v2f16_neg_a_nsz_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #1 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -628,7 +628,7 @@ define amdgpu_kernel void @mac_v2f16_neg_b_nsz_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #1 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -661,7 +661,7 @@ define amdgpu_kernel void @mac_v2f16_neg_c_nsz_fp_math(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #1 {
+    ptr addrspace(1) %c) nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %b.val = load <2 x half>, ptr addrspace(1) %b
@@ -675,8 +675,4 @@ entry:
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #2
-
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" }
-attributes #2 = { nounwind convergent }
+declare void @llvm.amdgcn.s.barrier() nounwind convergent
diff --git a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
index f7933d719f9893..0ac956cc05448c 100644
--- a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
@@ -80,7 +80,7 @@ define amdgpu_kernel void @madak_f16(
 ; GFX11-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
-    ptr addrspace(1) %b) #0 {
+    ptr addrspace(1) %b) "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load half, ptr addrspace(1) %a
   %b.val = load half, ptr addrspace(1) %b
@@ -208,7 +208,7 @@ define amdgpu_kernel void @madak_f16_use_2(
     ptr addrspace(1) %r1,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
-    ptr addrspace(1) %c) #0 {
+    ptr addrspace(1) %c) "denormal-fp-math"="preserve-sign,preserve-sign" {
 entry:
   %a.val = load volatile half, ptr addrspace(1) %a
   %b.val = load volatile half, ptr addrspace(1) %b
@@ -223,5 +223,3 @@ entry:
   store half %r1.val, ptr addrspace(1) %r1
   ret void
 }
-
-attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/v_pack.ll b/llvm/test/CodeGen/AMDGPU/v_pack.ll
index 8bc8fbd0e0e846..d576d7454c941b 100644
--- a/llvm/test/CodeGen/AMDGPU/v_pack.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_pack.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 ; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GISEL %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GCN-LABEL: v_pack_b32_v2f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -49,11 +49,11 @@ define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace
   %vec.0 = insertelement <2 x half> undef, half %v0.add, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half %v1.add, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_b32_v2f16_sub(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_b32_v2f16_sub(ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GCN-LABEL: v_pack_b32_v2f16_sub:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -98,7 +98,7 @@ define amdgpu_kernel void @v_pack_b32_v2f16_sub(ptr addrspace(1) %in0, ptr addrs
   %vec.0 = insertelement <2 x half> undef, half %v0.add, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half %v1.add, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
@@ -144,7 +144,7 @@ define amdgpu_kernel void @fptrunc(
   ret void
 }
 
-define amdgpu_kernel void @v_pack_b32.fabs(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_b32.fabs(ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GCN-LABEL: v_pack_b32.fabs:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -191,11 +191,11 @@ define amdgpu_kernel void @v_pack_b32.fabs(ptr addrspace(1) %in0, ptr addrspace(
   %vec.0 = insertelement <2 x half> undef, half %v0.fabs, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half %v1.fabs, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-define amdgpu_kernel void @v_pack_b32.fneg(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
+define amdgpu_kernel void @v_pack_b32.fneg(ptr addrspace(1) %in0, ptr addrspace(1) %in1) nounwind {
 ; GCN-LABEL: v_pack_b32.fneg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -242,12 +242,9 @@ define amdgpu_kernel void @v_pack_b32.fneg(ptr addrspace(1) %in0, ptr addrspace(
   %vec.0 = insertelement <2 x half> undef, half %v0.fneg, i32 0
   %vec.1 = insertelement <2 x half> %vec.0, half %v1.fneg, i32 1
   %vec.i32 = bitcast <2 x half> %vec.1 to i32
-  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0
+  call void asm sideeffect "; use $0", "v"(i32 %vec.i32) nounwind
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare half @llvm.fabs.f16(half) nounwind readnone
 
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 9a64a6d99f46fe..56d0989766d3d0 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -24,7 +24,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 ; SI: [[FLOW_BB]]:
 ; SI-NEXT: s_andn2_saveexec_b64 [[SAVE2]], [[SAVE2]]
 ;
-define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(1) %dst) #1 {
+define amdgpu_kernel void @test_if(i32 %b, ptr addrspace(1) %src, ptr addrspace(1) %dst) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   switch i32 %tid, label %default [
@@ -69,7 +69,7 @@ end:
 
 ; SI-NEXT: {{^}}[[EXIT]]:
 ; SI: s_endpgm
-define amdgpu_kernel void @simple_test_v_if(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @simple_test_v_if(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %is.0 = icmp ne i32 %tid, 0
   br i1 %is.0, label %then, label %exit
@@ -95,7 +95,7 @@ exit:
 
 ; SI-NEXT: {{^}}[[EXIT]]:
 ; SI: s_endpgm
-define amdgpu_kernel void @simple_test_v_if_ret_else_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @simple_test_v_if_ret_else_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %is.0 = icmp ne i32 %tid, 0
   br i1 %is.0, label %then, label %exit
@@ -132,7 +132,7 @@ exit:
 
 ; SI-NEXT: {{^}}[[EXIT]]:
 ; SI: ds_write_b32
-define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %is.0 = icmp ne i32 %tid, 0
   br i1 %is.0, label %then, label %exit
@@ -161,7 +161,7 @@ exit:
 ; SI: s_cbranch_scc1 [[LABEL_LOOP]]
 ; SI: [[LABEL_EXIT]]:
 ; SI: s_endpgm
-define amdgpu_kernel void @simple_test_v_loop(ptr addrspace(1) %dst, ptr addrspace(1) %src) #1 {
+define amdgpu_kernel void @simple_test_v_loop(ptr addrspace(1) %dst, ptr addrspace(1) %src) nounwind {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %is.0 = icmp ne i32 %tid, 0
@@ -220,9 +220,9 @@ exit:
 ; SI: [[LABEL_EXIT]]:
 ; SI-NOT: [[COND_STATE]]
 ; SI: s_endpgm
-define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture readonly %arg2, ptr addrspace(1) noalias nocapture readonly %arg3) #1 {
+define amdgpu_kernel void @multi_vcond_loop(ptr addrspace(1) noalias nocapture %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture readonly %arg2, ptr addrspace(1) noalias nocapture readonly %arg3) nounwind {
 bb:
-  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp4 = sext i32 %tmp to i64
   %tmp5 = getelementptr inbounds i32, ptr addrspace(1) %arg3, i64 %tmp4
   %tmp6 = load i32, ptr addrspace(1) %tmp5, align 4
@@ -253,6 +253,3 @@ bb20:                                             ; preds = %bb10
 bb26:                                             ; preds = %bb10, %bb20, %bb
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
index 64d7c7868ca8de..d8d7f3fb3a7efb 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll
@@ -8,7 +8,7 @@ target datalayout = "A5"
 ; OPT: <8 x i64>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <8 x i64>
-define amdgpu_kernel void @alloca_8xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @alloca_8xi64_max1024(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,1024" {
 entry:
   %tmp = alloca [8 x i64], addrspace(5)
   store i64 0, ptr addrspace(5) %tmp
@@ -23,7 +23,7 @@ entry:
 ; OPT-NOT: <9 x i64>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <9 x i64>
-define amdgpu_kernel void @alloca_9xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 {
+define amdgpu_kernel void @alloca_9xi64_max1024(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,1024" {
 entry:
   %tmp = alloca [9 x i64], addrspace(5)
   store i64 0, ptr addrspace(5) %tmp
@@ -38,7 +38,7 @@ entry:
 ; OPT: <16 x i64>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <16 x i64>
-define amdgpu_kernel void @alloca_16xi64_max512(ptr addrspace(1) %out, i32 %index) #1 {
+define amdgpu_kernel void @alloca_16xi64_max512(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,512" {
 entry:
   %tmp = alloca [16 x i64], addrspace(5)
   store i64 0, ptr addrspace(5) %tmp
@@ -53,7 +53,7 @@ entry:
 ; OPT-NOT: <17 x i64>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <17 x i64>
-define amdgpu_kernel void @alloca_17xi64_max512(ptr addrspace(1) %out, i32 %index) #1 {
+define amdgpu_kernel void @alloca_17xi64_max512(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,512" {
 entry:
   %tmp = alloca [17 x i64], addrspace(5)
   store i64 0, ptr addrspace(5) %tmp
@@ -68,7 +68,7 @@ entry:
 ; OPT-NOT: <9 x i128>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <9 x i128>
-define amdgpu_kernel void @alloca_9xi128_max512(ptr addrspace(1) %out, i32 %index) #1 {
+define amdgpu_kernel void @alloca_9xi128_max512(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,512" {
 entry:
   %tmp = alloca [9 x i128], addrspace(5)
   store i128 0, ptr addrspace(5) %tmp
@@ -83,7 +83,7 @@ entry:
 ; OPT: <9 x i128>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <9 x i128>
-define amdgpu_kernel void @alloca_9xi128_max256(ptr addrspace(1) %out, i32 %index) #2 {
+define amdgpu_kernel void @alloca_9xi128_max256(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tmp = alloca [9 x i128], addrspace(5)
   store i128 0, ptr addrspace(5) %tmp
@@ -98,7 +98,7 @@ entry:
 ; OPT: <16 x i128>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <16 x i128>
-define amdgpu_kernel void @alloca_16xi128_max256(ptr addrspace(1) %out, i32 %index) #2 {
+define amdgpu_kernel void @alloca_16xi128_max256(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tmp = alloca [16 x i128], addrspace(5)
   store i128 0, ptr addrspace(5) %tmp
@@ -113,7 +113,7 @@ entry:
 ; OPT-NOT: <9 x i256>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <9 x i256>
-define amdgpu_kernel void @alloca_9xi256_max256(ptr addrspace(1) %out, i32 %index) #2 {
+define amdgpu_kernel void @alloca_9xi256_max256(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tmp = alloca [9 x i256], addrspace(5)
   store i256 0, ptr addrspace(5) %tmp
@@ -128,7 +128,7 @@ entry:
 ; OPT: <9 x i64>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <9 x i64>
-define amdgpu_kernel void @alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {
+define amdgpu_kernel void @alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tmp = alloca [9 x i64], addrspace(5)
   store i64 0, ptr addrspace(5) %tmp
@@ -143,7 +143,7 @@ entry:
 ; OPT-NOT: <9 x i64>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <9 x i64>
-define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 {
+define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tmp = alloca [9 x i64], addrspace(5)
   store i64 0, ptr addrspace(5) %tmp
@@ -158,7 +158,7 @@ entry:
 ; OPT: <9 x i64>
 ; LIMIT32: alloca
 ; LIMIT32-NOT: <9 x i64>
-define void @alwaysinlined_func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #3 {
+define void @alwaysinlined_func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) alwaysinline "amdgpu-flat-work-group-size"="1,256" {
 entry:
   %tmp = alloca [9 x i64], addrspace(5)
   store i64 0, ptr addrspace(5) %tmp
@@ -167,8 +167,3 @@ entry:
   store i64 %tmp2, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" }
-attributes #1 = { "amdgpu-flat-work-group-size"="1,512" }
-attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }
-attributes #3 = { alwaysinline "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
index eb88c790dfe729..4b6746ea5425ee 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
@@ -4,10 +4,10 @@
 ; Test that when extracting the same unknown vector index from an
 ; insertelement the dynamic indexing is folded away.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; No dynamic indexing required
-define amdgpu_kernel void @extract_insert_same_dynelt_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val, i32 %idx) #1 {
+define amdgpu_kernel void @extract_insert_same_dynelt_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val, i32 %idx) nounwind {
 ; GCN-LABEL: extract_insert_same_dynelt_v4i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -31,7 +31,7 @@ define amdgpu_kernel void @extract_insert_same_dynelt_v4i32(ptr addrspace(1) %ou
   ret void
 }
 
-define amdgpu_kernel void @extract_insert_different_dynelt_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val, i32 %idx0, i32 %idx1) #1 {
+define amdgpu_kernel void @extract_insert_different_dynelt_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val, i32 %idx0, i32 %idx1) nounwind {
 ; GCN-LABEL: extract_insert_different_dynelt_v4i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -84,7 +84,7 @@ define amdgpu_kernel void @extract_insert_different_dynelt_v4i32(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @extract_insert_same_elt2_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val, i32 %idx) #1 {
+define amdgpu_kernel void @extract_insert_same_elt2_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val, i32 %idx) nounwind {
 ; GCN-LABEL: extract_insert_same_elt2_v4i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -108,7 +108,7 @@ define amdgpu_kernel void @extract_insert_same_elt2_v4i32(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @extract_insert_same_dynelt_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in, float %val, i32 %idx) #1 {
+define amdgpu_kernel void @extract_insert_same_dynelt_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in, float %val, i32 %idx) nounwind {
 ; GCN-LABEL: extract_insert_same_dynelt_v4f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -136,6 +136,3 @@ define amdgpu_kernel void @extract_insert_same_dynelt_v4f32(ptr addrspace(1) %ou
   store float %extract, ptr addrspace(1) %gep.out
   ret void
 }
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
index 66c49ba8b734db..0f9f1214313bb4 100644
--- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
@@ -4879,8 +4879,6 @@ define void @shuffle_v32bf16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %ar
   ret void
 }
 
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
-declare <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x bfloat>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone speculatable }
+declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) nounwind readnone speculatable
+declare <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x bfloat>) nounwind readnone speculatable
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
index 040799435db4a5..1842d72c216d14 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-agpr-limit-gfx90a.ll
@@ -539,18 +539,16 @@ define internal void @use512vgprs() {
   ret void
 }
 
-define void @foo() #0 {
+define void @foo() noinline {
   call void asm sideeffect "; use $0", "a"(i32 0)
   ret void
 }
 
-attributes #0 = { noinline }
-
 ; GCN-LABEL: {{^}}k256_w8:
 ; GFX90A: NumVgprs: 32
 ; GFX90A: NumAgprs: 32
 ; GFX90A: TotalNumVgprs: 64
-define amdgpu_kernel void @k256_w8() #2568 {
+define amdgpu_kernel void @k256_w8() nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" {
   call void @foo()
   call void @use256vgprs()
   ret void
@@ -560,18 +558,16 @@ define amdgpu_kernel void @k256_w8() #2568 {
 ; GFX90A: NumVgprs: 64
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 64
-define amdgpu_kernel void @k256_w8_no_agprs() #2568 {
+define amdgpu_kernel void @k256_w8_no_agprs() nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" {
   call void @use256vgprs()
   ret void
 }
 
-attributes #2568 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="8" }
-
 ; GCN-LABEL: {{^}}k256_w4:
 ; GFX90A: NumVgprs: 64
 ; GFX90A: NumAgprs: 64
 ; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k256_w4() #2564 {
+define amdgpu_kernel void @k256_w4() nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" {
   call void @foo()
   call void @use256vgprs()
   ret void
@@ -581,18 +577,16 @@ define amdgpu_kernel void @k256_w4() #2564 {
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k256_w4_no_agprs() #2564 {
+define amdgpu_kernel void @k256_w4_no_agprs() nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" {
   call void @use256vgprs()
   ret void
 }
 
-attributes #2564 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="4" }
-
 ; GCN-LABEL: {{^}}k256_w2:
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 128
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k256_w2() #2562 {
+define amdgpu_kernel void @k256_w2() nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" {
   call void @foo()
   call void @use256vgprs()
   ret void
@@ -602,18 +596,16 @@ define amdgpu_kernel void @k256_w2() #2562 {
 ; GFX90A: NumVgprs: 256
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k256_w2_no_agprs() #2562 {
+define amdgpu_kernel void @k256_w2_no_agprs() nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" {
   call void @use256vgprs()
   ret void
 }
 
-attributes #2562 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2" }
-
 ; GCN-LABEL: {{^}}k256_w1:
 ; GFX90A: NumVgprs: 256
 ; GFX90A: NumAgprs: 256
 ; GFX90A: TotalNumVgprs: 512
-define amdgpu_kernel void @k256_w1() #2561 {
+define amdgpu_kernel void @k256_w1() nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="1" {
   call void @foo()
   call void @use512vgprs()
   ret void
@@ -623,18 +615,16 @@ define amdgpu_kernel void @k256_w1() #2561 {
 ; GFX90A: NumVgprs: 256
 ; GFX90A: NumAgprs: 256
 ; GFX90A: TotalNumVgprs: 512
-define amdgpu_kernel void @k256_w1_no_agprs() #2561 {
+define amdgpu_kernel void @k256_w1_no_agprs() nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="1" {
   call void @use512vgprs()
   ret void
 }
 
-attributes #2561 = { nounwind "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="1" }
-
 ; GCN-LABEL: {{^}}k512_no_agprs:
 ; GFX90A: NumVgprs: 256
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k512_no_agprs() #512 {
+define amdgpu_kernel void @k512_no_agprs() nounwind "amdgpu-flat-work-group-size"="512,512" {
   call void @use256vgprs()
   ret void
 }
@@ -643,7 +633,7 @@ define amdgpu_kernel void @k512_no_agprs() #512 {
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 128
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k512_call() #512 {
+define amdgpu_kernel void @k512_call() nounwind "amdgpu-flat-work-group-size"="512,512" {
   call void @foo()
   call void @use256vgprs()
   ret void
@@ -653,7 +643,7 @@ define amdgpu_kernel void @k512_call() #512 {
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 128
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k512_virtual_agpr() #512 {
+define amdgpu_kernel void @k512_virtual_agpr() nounwind "amdgpu-flat-work-group-size"="512,512" {
   %a0 = call i32 asm sideeffect "; def $0", "=a"()
   call void @use256vgprs()
   ret void
@@ -663,7 +653,7 @@ define amdgpu_kernel void @k512_virtual_agpr() #512 {
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 128
 ; GFX90A: TotalNumVgprs: 256
-define amdgpu_kernel void @k512_physical_agpr() #512 {
+define amdgpu_kernel void @k512_physical_agpr() nounwind "amdgpu-flat-work-group-size"="512,512" {
   call void asm sideeffect "", "~{a8}" ()
   call void @use256vgprs()
   ret void
@@ -672,18 +662,16 @@ define amdgpu_kernel void @k512_physical_agpr() #512 {
 ; GCN-LABEL: {{^}}f512:
 ; GFX90A: NumVgprs: 12{{[0-9]}}
 ; GFX90A: NumAgprs: {{[1-9]}}
-define void @f512() #512 {
+define void @f512() nounwind "amdgpu-flat-work-group-size"="512,512" {
   call void @use256vgprs()
   ret void
 }
 
-attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
-
 ; GCN-LABEL: {{^}}k1024:
 ; GFX90A: NumVgprs: 128
 ; GFX90A: NumAgprs: 0
 ; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k1024() #1024 {
+define amdgpu_kernel void @k1024() nounwind "amdgpu-flat-work-group-size"="1024,1024" {
   call void @use256vgprs()
   ret void
 }
@@ -692,10 +680,8 @@ define amdgpu_kernel void @k1024() #1024 {
 ; GFX90A: NumVgprs: 64
 ; GFX90A: NumAgprs: 64
 ; GFX90A: TotalNumVgprs: 128
-define amdgpu_kernel void @k1024_call() #1024 {
+define amdgpu_kernel void @k1024_call() nounwind "amdgpu-flat-work-group-size"="1024,1024" {
   call void @foo()
   call void @use256vgprs()
   ret void
 }
-
-attributes #1024 = { nounwind "amdgpu-flat-work-group-size"="1024,1024" }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
index 0211c5111c31dd..de32d361dd435c 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GCN
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 | FileCheck %s --check-prefix=GFX11
 
-define void @vgpr_descriptor_waterfall_loop_idom_update(ptr %arg) #0 {
+define void @vgpr_descriptor_waterfall_loop_idom_update(ptr %arg) nounwind memory(argmem: write) {
 ; GCN-LABEL: vgpr_descriptor_waterfall_loop_idom_update:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -80,6 +80,4 @@ bb0:
   br label %bb0
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
-
-attributes #0 = { nounwind memory(argmem: write) }
+declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) nounwind memory(argmem: write)
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index d2364a61ed6862..70a2a27683a66b 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -amdgpu-opt-vgpr-liverange=true -stop-after=si-opt-vgpr-liverange -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 ; a normal if-else
-define amdgpu_ps float @else1(i32 %z, float %v) #0 {
+define amdgpu_ps float @else1(i32 %z, float %v) nounwind {
   ; SI-LABEL: name: else1
   ; SI: bb.0.main_body:
   ; SI-NEXT:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
@@ -58,7 +58,7 @@ end:
 
 
 ; %v was used after if-else
-define amdgpu_ps float @else2(i32 %z, float %v) #0 {
+define amdgpu_ps float @else2(i32 %z, float %v) nounwind {
   ; SI-LABEL: name: else2
   ; SI: bb.0.main_body:
   ; SI-NEXT:   successors: %bb.3(0x40000000), %bb.1(0x40000000)
@@ -117,7 +117,7 @@ end:
 }
 
 ; if-else inside loop, %x can be optimized, but %v cannot be.
-define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
+define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) nounwind {
   ; SI-LABEL: name: else3
   ; SI: bb.0.entry:
   ; SI-NEXT:   successors: %bb.1(0x80000000)
@@ -214,7 +214,7 @@ for.end:
 }
 
 ; a loop inside an if-else
-define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
+define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) nounwind {
   ; SI-LABEL: name: loop
   ; SI: bb.0.main_body:
   ; SI-NEXT:   successors: %bb.6(0x40000000), %bb.1(0x40000000)
@@ -337,7 +337,7 @@ end:
 }
 
 ; a loop inside an if-else, but the variable is still in use after the if-else
-define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
+define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) nounwind {
   ; SI-LABEL: name: loop_with_use
   ; SI: bb.0.main_body:
   ; SI-NEXT:   successors: %bb.6(0x40000000), %bb.1(0x40000000)
@@ -668,10 +668,6 @@ if.then:                                          ; preds = %entry
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable willreturn
 
-declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { nounwind readonly willreturn }
+declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
index 25d8300eb45835..d074b1b1ac7193 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -amdgpu-opt-vgpr-liverange=true -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 ; a normal if-else
-define amdgpu_ps float @else1(i32 %z, float %v) #0 {
+define amdgpu_ps float @else1(i32 %z, float %v) nounwind {
 ; SI-LABEL: else1:
 ; SI:       ; %bb.0: ; %main_body
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
@@ -45,7 +45,7 @@ end:
 
 
 ; %v was used after if-else
-define amdgpu_ps float @else2(i32 %z, float %v) #0 {
+define amdgpu_ps float @else2(i32 %z, float %v) nounwind {
 ; SI-LABEL: else2:
 ; SI:       ; %bb.0: ; %main_body
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
@@ -83,7 +83,7 @@ end:
 }
 
 ; if-else inside loop, %x can be optimized, but %v cannot be.
-define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
+define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) nounwind {
 ; SI-LABEL: else3:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
@@ -158,7 +158,7 @@ for.end:
 }
 
 ; a loop inside an if-else
-define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
+define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) nounwind {
 ; SI-LABEL: loop:
 ; SI:       ; %bb.0: ; %main_body
 ; SI-NEXT:    v_mov_b32_e32 v6, v0
@@ -236,7 +236,7 @@ end:
 }
 
 ; a loop inside an if-else, but the variable is still in use after the if-else
-define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 {
+define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) nounwind {
 ; SI-LABEL: loop_with_use:
 ; SI:       ; %bb.0: ; %main_body
 ; SI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
@@ -309,5 +309,3 @@ end:
   %r2 = fadd float %r, %v
   ret float %r2
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
index 65a7554bb66a53..8358f9e6e9d80f 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -62,7 +62,7 @@
 ; GCN: ScratchSize: 1536
 
 ; s[0:3] input user SGPRs. s4,s5,s6 = workgroup IDs. s8 scratch offset.
-define amdgpu_kernel void @spill_vgpr_compute(<4 x float> %arg6, ptr addrspace(1) %arg, i32 %arg1, i32 %arg2, float %arg3, float %arg4, float %arg5) #0 {
+define amdgpu_kernel void @spill_vgpr_compute(<4 x float> %arg6, ptr addrspace(1) %arg, i32 %arg1, i32 %arg2, float %arg3, float %arg4, float %arg5) nounwind {
 bb:
   %tmp = add i32 %arg1, %arg2
   %tmp7 = extractelement <4 x float> %arg6, i32 0
@@ -203,7 +203,7 @@ bb12:                                             ; preds = %bb145, %bb
   %tmp140 = phi float [ 0.000000e+00, %bb ], [ %tmp405, %bb145 ]
   %tmp141 = phi float [ 0.000000e+00, %bb ], [ %tmp406, %bb145 ]
   %tmp142 = bitcast float %tmp95 to i32
-  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tmp143 = icmp sgt i32 %tmp142, %tid
   br i1 %tmp143, label %bb144, label %bb145
 
@@ -610,7 +610,4 @@ bb145:                                            ; preds = %bb12
   br label %bb12
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index 0cabfa9aea0e49..9f8ecc0c7b25fa 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -27,7 +27,7 @@
 ; GCN: NumVgprs: 256
 ; GCN: ScratchSize: 640
 
-define amdgpu_vs void @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
+define amdgpu_vs void @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) nounwind {
 bb:
   %tmp11 = load <4 x i32>, ptr addrspace(4) %arg1, align 16, !tbaa !0
   %tmp12 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %tmp11, i32 0, i32 0)
@@ -175,44 +175,44 @@ bb24:                                             ; preds = %bb157, %bb
   %tmp152 = phi float [ 0.000000e+00, %bb ], [ %tmp417, %bb157 ]
   %tmp153 = phi float [ 0.000000e+00, %bb ], [ %tmp418, %bb157 ]
   %tmp154 = bitcast float %tmp107 to i32
-  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) nounwind readnone
   %tmp155 = icmp sgt i32 %tmp154, %tid
   br i1 %tmp155, label %bb156, label %bb157
 
 bb156:                                            ; preds = %bb24
-  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp12, float %tmp103, float %tmp102, float %tmp101, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %tmp99, float %tmp98, float %tmp97, float %tmp95, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %tmp94, float %tmp93, float %tmp91, float %tmp90, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 35, i32 15, float %tmp89, float %tmp87, float %tmp86, float %tmp85, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 36, i32 15, float %tmp83, float %tmp82, float %tmp81, float %tmp79, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 37, i32 15, float %tmp78, float %tmp77, float %tmp75, float %tmp74, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 38, i32 15, float %tmp73, float %tmp71, float %tmp70, float %tmp69, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 39, i32 15, float %tmp67, float %tmp66, float %tmp65, float %tmp63, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp62, float %tmp61, float %tmp59, float %tmp58, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 41, i32 15, float %tmp57, float %tmp55, float %tmp54, float %tmp53, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 42, i32 15, float %tmp51, float %tmp50, float %tmp49, float %tmp47, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 43, i32 15, float %tmp46, float %tmp45, float %tmp43, float %tmp42, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 44, i32 15, float %tmp41, float %tmp39, float %tmp38, float %tmp37, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 45, i32 15, float %tmp35, float %tmp34, float %tmp33, float %tmp31, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 46, i32 15, float %tmp30, float %tmp29, float %tmp27, float %tmp26, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 47, i32 15, float %tmp25, float %tmp28, float %tmp32, float %tmp36, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 48, i32 15, float %tmp40, float %tmp44, float %tmp48, float %tmp52, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 49, i32 15, float %tmp56, float %tmp60, float %tmp64, float %tmp68, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 50, i32 15, float %tmp72, float %tmp76, float %tmp80, float %tmp84, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 51, i32 15, float %tmp88, float %tmp92, float %tmp96, float %tmp100, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 52, i32 15, float %tmp104, float %tmp105, float %tmp106, float %tmp108, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 53, i32 15, float %tmp109, float %tmp110, float %tmp111, float %tmp112, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 54, i32 15, float %tmp113, float %tmp114, float %tmp115, float %tmp116, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 55, i32 15, float %tmp117, float %tmp118, float %tmp119, float %tmp120, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 56, i32 15, float %tmp121, float %tmp122, float %tmp123, float %tmp124, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 57, i32 15, float %tmp125, float %tmp126, float %tmp127, float %tmp128, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 58, i32 15, float %tmp129, float %tmp130, float %tmp131, float %tmp132, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 59, i32 15, float %tmp133, float %tmp134, float %tmp135, float %tmp136, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 60, i32 15, float %tmp137, float %tmp138, float %tmp139, float %tmp140, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 61, i32 15, float %tmp141, float %tmp142, float %tmp143, float %tmp144, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 62, i32 15, float %tmp145, float %tmp146, float %tmp147, float %tmp148, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float %tmp149, float %tmp150, float %tmp151, float %tmp13, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 true, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp12, float %tmp103, float %tmp102, float %tmp101, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %tmp99, float %tmp98, float %tmp97, float %tmp95, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %tmp94, float %tmp93, float %tmp91, float %tmp90, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 35, i32 15, float %tmp89, float %tmp87, float %tmp86, float %tmp85, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 36, i32 15, float %tmp83, float %tmp82, float %tmp81, float %tmp79, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 37, i32 15, float %tmp78, float %tmp77, float %tmp75, float %tmp74, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 38, i32 15, float %tmp73, float %tmp71, float %tmp70, float %tmp69, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 39, i32 15, float %tmp67, float %tmp66, float %tmp65, float %tmp63, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp62, float %tmp61, float %tmp59, float %tmp58, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 41, i32 15, float %tmp57, float %tmp55, float %tmp54, float %tmp53, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 42, i32 15, float %tmp51, float %tmp50, float %tmp49, float %tmp47, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 43, i32 15, float %tmp46, float %tmp45, float %tmp43, float %tmp42, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 44, i32 15, float %tmp41, float %tmp39, float %tmp38, float %tmp37, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 45, i32 15, float %tmp35, float %tmp34, float %tmp33, float %tmp31, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 46, i32 15, float %tmp30, float %tmp29, float %tmp27, float %tmp26, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 47, i32 15, float %tmp25, float %tmp28, float %tmp32, float %tmp36, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 48, i32 15, float %tmp40, float %tmp44, float %tmp48, float %tmp52, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 49, i32 15, float %tmp56, float %tmp60, float %tmp64, float %tmp68, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 50, i32 15, float %tmp72, float %tmp76, float %tmp80, float %tmp84, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 51, i32 15, float %tmp88, float %tmp92, float %tmp96, float %tmp100, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 52, i32 15, float %tmp104, float %tmp105, float %tmp106, float %tmp108, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 53, i32 15, float %tmp109, float %tmp110, float %tmp111, float %tmp112, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 54, i32 15, float %tmp113, float %tmp114, float %tmp115, float %tmp116, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 55, i32 15, float %tmp117, float %tmp118, float %tmp119, float %tmp120, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 56, i32 15, float %tmp121, float %tmp122, float %tmp123, float %tmp124, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 57, i32 15, float %tmp125, float %tmp126, float %tmp127, float %tmp128, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 58, i32 15, float %tmp129, float %tmp130, float %tmp131, float %tmp132, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 59, i32 15, float %tmp133, float %tmp134, float %tmp135, float %tmp136, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 60, i32 15, float %tmp137, float %tmp138, float %tmp139, float %tmp140, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 61, i32 15, float %tmp141, float %tmp142, float %tmp143, float %tmp144, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 62, i32 15, float %tmp145, float %tmp146, float %tmp147, float %tmp148, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float %tmp149, float %tmp150, float %tmp151, float %tmp13, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 true, i1 false) nounwind
   ret void
 
 bb157:                                            ; preds = %bb24
@@ -483,15 +483,11 @@ bb157:                                            ; preds = %bb24
   br label %bb24
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
 
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
 
 !0 = !{!1, !1, i64 0, i32 1}
 !1 = !{!"const", !2}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
index 2b96e10fd3cc3b..8d35f3a6e353b6 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
 
-declare void @extern_func() #2
+declare void @extern_func() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
 
 define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
 ; The vgpr tuple8 operand in image_gather4_c_b_cl instruction needs not be
@@ -184,10 +184,10 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 
 
 main_body:
-  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
-  call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0
-  call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0
-  call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0
+  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() nounwind writeonly
+  call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() nounwind writeonly
+  call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() nounwind writeonly
+  call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind writeonly
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
   call void @extern_func()
   ret <4 x float> %v
@@ -360,8 +360,4 @@ main_body:
   ret <4 x float> %v1
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
-
-attributes #0 = { nounwind writeonly }
-attributes #1 = { nounwind readonly }
-attributes #2 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) nounwind readonly
diff --git a/llvm/test/CodeGen/AMDGPU/vi-removed-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/vi-removed-intrinsics.ll
index f99c3f8306ac1f..e7e7a254bf9073 100644
--- a/llvm/test/CodeGen/AMDGPU/vi-removed-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/vi-removed-intrinsics.ll
@@ -2,17 +2,14 @@
 
 ; ERROR: error: foo.cl:1:42: in function rsq_legacy_f32 void (ptr addrspace(1), float): intrinsic not supported on subtarget
 
-declare float @llvm.amdgcn.rsq.legacy(float) #0
+declare float @llvm.amdgcn.rsq.legacy(float) nounwind readnone
 
-define amdgpu_kernel void @rsq_legacy_f32(ptr addrspace(1) %out, float %src) #1 {
+define amdgpu_kernel void @rsq_legacy_f32(ptr addrspace(1) %out, float %src) nounwind {
   %rsq = call float @llvm.amdgcn.rsq.legacy(float %src), !dbg !4
   store float %rsq, ptr addrspace(1) %out, align 4
   ret void
 }
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2, !3}
 
diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll
index bfa106eb19f922..9f5c297cfe214c 100644
--- a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll
+++ b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll
@@ -10,7 +10,7 @@
 
 define amdgpu_kernel void @sub_rev(ptr addrspace(1) %out, <4 x i32> %sgpr, i32 %cond) {
 entry:
-  %vgpr = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %vgpr = call i32 @llvm.amdgcn.workitem.id.x() readnone
   %tmp = icmp eq i32 %cond, 0
   br i1 %tmp, label %if, label %else
 
@@ -45,7 +45,4 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/wait.ll b/llvm/test/CodeGen/AMDGPU/wait.ll
index 8d88a1159e4b98..ea929c48c0ec35 100644
--- a/llvm/test/CodeGen/AMDGPU/wait.ll
+++ b/llvm/test/CodeGen/AMDGPU/wait.ll
@@ -13,7 +13,7 @@
 ; DEFAULT-DAG: exp
 ; DEFAULT: exp
 ; DEFAULT-NEXT: s_endpgm
-define amdgpu_vs void @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, ptr addrspace(4) inreg %constptr) #0 {
+define amdgpu_vs void @main(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, ptr addrspace(4) inreg %constptr) nounwind {
 main_body:
   %tmp10 = load <16 x i8>, ptr addrspace(4) %arg3, !tbaa !0
   %tmp10.cast.int = bitcast <16 x i8> %tmp10 to i128
@@ -21,7 +21,7 @@ main_body:
   %tmp11 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %tmp10.cast, i32 %arg6, i32 0, i32 0, i32 0)
   %tmp12 = extractelement <4 x float> %tmp11, i32 0
   %tmp13 = extractelement <4 x float> %tmp11, i32 1
-  call void @llvm.amdgcn.s.barrier() #1
+  call void @llvm.amdgcn.s.barrier() convergent nounwind
   %tmp14 = extractelement <4 x float> %tmp11, i32 2
   %tmp15 = load float, ptr addrspace(4) %constptr, align 4
   %tmp16 = getelementptr <16 x i8>, ptr addrspace(4) %arg3, i32 1
@@ -33,8 +33,8 @@ main_body:
   %tmp20 = extractelement <4 x float> %tmp18, i32 1
   %tmp21 = extractelement <4 x float> %tmp18, i32 2
   %tmp22 = extractelement <4 x float> %tmp18, i32 3
-  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp12, float %tmp13, float %tmp14, float %tmp15, i1 true, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp19, float %tmp20, float %tmp21, float %tmp22, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp12, float %tmp13, float %tmp14, float %tmp15, i1 true, i1 false) nounwind
   ret void
 }
 
@@ -47,7 +47,7 @@ main_body:
 ; ILPMAX: exp pos0
 ; ILPMAX-NEXT: exp param0
 ; ILPMAX: s_endpgm
-define amdgpu_vs void @main2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
+define amdgpu_vs void @main2(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, ptr addrspace(4) inreg %arg3, ptr addrspace(4) inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) nounwind {
 main_body:
   %tmp11 = load <16 x i8>, ptr addrspace(4) %arg4, align 16, !tbaa !0
   %tmp12 = add i32 %arg5, %arg7
@@ -68,18 +68,14 @@ main_body:
   %tmp23 = extractelement <4 x float> %tmp21, i32 1
   %tmp24 = extractelement <4 x float> %tmp21, i32 2
   %tmp25 = extractelement <4 x float> %tmp21, i32 3
-  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp14, float %tmp15, float %tmp16, float %tmp17, i1 false, i1 false) #0
-  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp22, float %tmp23, float %tmp24, float %tmp25, i1 true, i1 false) #0
+  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp14, float %tmp15, float %tmp16, float %tmp17, i1 false, i1 false) nounwind
+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %tmp22, float %tmp23, float %tmp24, float %tmp25, i1 true, i1 false) nounwind
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #1
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
-attributes #2 = { nounwind readonly }
+declare void @llvm.amdgcn.s.barrier() convergent nounwind
+declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
 
 !0 = !{!1, !1, i64 0, i32 1}
 !1 = !{!"const", !2}
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
index b32ce6eb0acc0d..66ee30da92dfcb 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
@@ -15,7 +15,7 @@
 @data_generic = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4
 @data_reference = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4
 
-define amdgpu_kernel void @testKernel(ptr addrspace(1) nocapture %arg) local_unnamed_addr #0 {
+define amdgpu_kernel void @testKernel(ptr addrspace(1) nocapture %arg) local_unnamed_addr "target-cpu"="fiji" "target-features"="-flat-for-global" {
 bb:
   store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr bitcast (ptr getelementptr ([100 x float], ptr addrspacecast ([100 x float] addrspace(1)* @data_generic to ptr), i64 0, i64 4) to ptr), align 4
   store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr bitcast (ptr getelementptr ([100 x float], ptr addrspacecast ([100 x float] addrspace(1)* @data_reference to ptr), i64 0, i64 4) to ptr), align 4
@@ -129,16 +129,13 @@ bb18:                                             ; preds = %bb18, %bb
 }
 
 ; Function Attrs: nounwind readnone speculatable
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1
+declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind readnone speculatable
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() nounwind readnone speculatable
 
 ; Function Attrs: nounwind readnone speculatable
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
-
-attributes #0 = { "target-cpu"="fiji" "target-features"="-flat-for-global" }
-attributes #1 = { nounwind readnone speculatable }
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
index b94e5c450cd17f..0ad99cfa0b92d1 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir
@@ -4,11 +4,9 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX11 %s
 
 --- |
-    define amdgpu_kernel void @max-counter-lgkmcnt() #0 { ret void }
-    define amdgpu_kernel void @max-counter-vmcnt() #0 { ret void }
-    define amdgpu_kernel void @max-counter-expcnt() #0 { ret void }
-
-    attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
+    define amdgpu_kernel void @max-counter-lgkmcnt() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @max-counter-vmcnt() "amdgpu-flat-work-group-size"="1,256" { ret void }
+    define amdgpu_kernel void @max-counter-expcnt() "amdgpu-flat-work-group-size"="1,256" { ret void }
 ...
 
 # Check that we handle cases where a counter has overflowed.
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 82816b4564e868..aee2db88007419 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -134,7 +134,7 @@ define amdgpu_kernel void @test_vopc_2xf16(ptr addrspace(1) %arg) {
   ret void
 }
 
-define amdgpu_kernel void @test_vopc_class(ptr addrspace(1) %out, float %x) #0 {
+define amdgpu_kernel void @test_vopc_class(ptr addrspace(1) %out, float %x) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_vopc_class:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_clause 0x1
@@ -165,7 +165,7 @@ define amdgpu_kernel void @test_vopc_class(ptr addrspace(1) %out, float %x) #0 {
   ret void
 }
 
-define amdgpu_kernel void @test_vcmp_vcnd_f16(ptr addrspace(1) %out, half %x) #0 {
+define amdgpu_kernel void @test_vcmp_vcnd_f16(ptr addrspace(1) %out, half %x) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_vcmp_vcnd_f16:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_clause 0x1
@@ -314,7 +314,7 @@ define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(ptr addrspace(1) %arg) {
   ret void
 }
 
-define amdgpu_kernel void @test_mask_if(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_mask_if(ptr addrspace(1) %arg) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_mask_if:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 10, v0
@@ -352,7 +352,7 @@ endif:
   ret void
 }
 
-define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_loop_with_if:
 ; GFX1032:       ; %bb.0: ; %bb
 ; GFX1032-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -512,7 +512,7 @@ bb13:
 
 
 
-define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_loop_with_if_else_break:
 ; GFX1032:       ; %bb.0: ; %bb
 ; GFX1032-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
@@ -628,7 +628,7 @@ bb8:
   ret void
 }
 
-define amdgpu_kernel void @test_addc_vop2b(ptr addrspace(1) %arg, i64 %arg1) #0 {
+define amdgpu_kernel void @test_addc_vop2b(ptr addrspace(1) %arg, i64 %arg1) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_addc_vop2b:
 ; GFX1032:       ; %bb.0: ; %bb
 ; GFX1032-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -661,7 +661,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_subbrev_vop2b(ptr addrspace(1) %arg, i64 %arg1) #0 {
+define amdgpu_kernel void @test_subbrev_vop2b(ptr addrspace(1) %arg, i64 %arg1) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_subbrev_vop2b:
 ; GFX1032:       ; %bb.0: ; %bb
 ; GFX1032-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -694,7 +694,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_subb_vop2b(ptr addrspace(1) %arg, i64 %arg1) #0 {
+define amdgpu_kernel void @test_subb_vop2b(ptr addrspace(1) %arg, i64 %arg1) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_subb_vop2b:
 ; GFX1032:       ; %bb.0: ; %bb
 ; GFX1032-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -727,7 +727,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
+define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_udiv64:
 ; GFX1032:       ; %bb.0: ; %bb
 ; GFX1032-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -1060,7 +1060,7 @@ bb:
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_div_scale_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_div_scale_f32:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1101,7 +1101,7 @@ define amdgpu_kernel void @test_div_scale_f32(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_div_scale_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_div_scale_f64:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
@@ -1144,7 +1144,7 @@ define amdgpu_kernel void @test_div_scale_f64(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
+define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_mad_i64_i32:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1163,7 +1163,7 @@ define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
   ret i64 %mad
 }
 
-define i64 @test_mad_u64_u32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
+define i64 @test_mad_u64_u32(i32 %arg0, i32 %arg1, i64 %arg2) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_mad_u64_u32:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1258,7 +1258,7 @@ define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, d
 
 
 
-define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %dummy) #0 {
+define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %dummy) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_div_fmas_f32_i1_phi_vcc:
 ; GFX1032:       ; %bb.0: ; %entry
 ; GFX1032-NEXT:    s_clause 0x1
@@ -1341,7 +1341,7 @@ exit:
 }
 
 
-define amdgpu_kernel void @fdiv_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
+define amdgpu_kernel void @fdiv_f32(ptr addrspace(1) %out, float %a, float %b) nounwind readnone speculatable {
 ; GFX1032-LABEL: fdiv_f32:
 ; GFX1032:       ; %bb.0: ; %entry
 ; GFX1032-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1443,7 +1443,7 @@ two:
   ret void
 }
 
-define amdgpu_kernel void @test_brcc_i1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i1 %val) #0 {
+define amdgpu_kernel void @test_brcc_i1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i1 %val) nounwind readnone speculatable {
 ; GCN-LABEL: test_brcc_i1:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s2, s[0:1], 0x34
@@ -1469,7 +1469,7 @@ end:
   ret void
 }
 
-define amdgpu_kernel void @test_preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) #0 {
+define amdgpu_kernel void @test_preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_preserve_condition_undef_flag:
 ; GFX1032:       ; %bb.0: ; %bb0
 ; GFX1032-NEXT:    s_clause 0x1
@@ -1528,7 +1528,7 @@ bb2:
   ret void
 }
 
-define amdgpu_kernel void @test_invert_true_phi_cond_break_loop(i32 %arg) #0 {
+define amdgpu_kernel void @test_invert_true_phi_cond_break_loop(i32 %arg) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_invert_true_phi_cond_break_loop:
 ; GFX1032:       ; %bb.0: ; %bb
 ; GFX1032-NEXT:    s_load_dword s0, s[0:1], 0x24
@@ -1630,7 +1630,7 @@ bb9:                                              ; preds = %Flow
   ret void
 }
 
-define amdgpu_kernel void @test_movrels_extract_neg_offset_vgpr(ptr addrspace(1) %out) #0 {
+define amdgpu_kernel void @test_movrels_extract_neg_offset_vgpr(ptr addrspace(1) %out) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_movrels_extract_neg_offset_vgpr:
 ; GFX1032:       ; %bb.0: ; %entry
 ; GFX1032-NEXT:    v_add_nc_u32_e32 v0, 0xfffffe00, v0
@@ -1661,14 +1661,14 @@ define amdgpu_kernel void @test_movrels_extract_neg_offset_vgpr(ptr addrspace(1)
 ; GFX1064-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX1064-NEXT:    s_endpgm
 entry:
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() nounwind
   %index = add i32 %id, -512
   %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
   store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_set_inactive(ptr addrspace(1) %out, i32 %in) #0 {
+define amdgpu_kernel void @test_set_inactive(ptr addrspace(1) %out, i32 %in) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_set_inactive:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_clause 0x1
@@ -1701,7 +1701,7 @@ define amdgpu_kernel void @test_set_inactive(ptr addrspace(1) %out, i32 %in) #0
   ret void
 }
 
-define amdgpu_kernel void @test_set_inactive_64(ptr addrspace(1) %out, i64 %in) #0 {
+define amdgpu_kernel void @test_set_inactive_64(ptr addrspace(1) %out, i64 %in) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_set_inactive_64:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -1734,7 +1734,7 @@ define amdgpu_kernel void @test_set_inactive_64(ptr addrspace(1) %out, i64 %in)
   ret void
 }
 
-define amdgpu_ps void @test_kill_i1_terminator_float() #0 {
+define amdgpu_ps void @test_kill_i1_terminator_float() nounwind readnone speculatable {
 ; GFX1032-LABEL: test_kill_i1_terminator_float:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_andn2_b32 exec_lo, exec_lo, exec_lo
@@ -1758,7 +1758,7 @@ define amdgpu_ps void @test_kill_i1_terminator_float() #0 {
   ret void
 }
 
-define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
+define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_kill_i1_terminator_i1:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    v_cmp_lt_i32_e32 vcc_lo, v0, v1
@@ -1798,7 +1798,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
   ret void
 }
 
-define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) #0 {
+define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_loop_vcc:
 ; GFX1032:       ; %bb.0: ; %entry
 ; GFX1032-NEXT:    s_mov_b32 s0, exec_lo
@@ -2056,7 +2056,7 @@ endif:
 }
 
 
-define amdgpu_ps <4 x float> @test_wqm1(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <2 x float> %pos) #0 {
+define amdgpu_ps <4 x float> @test_wqm1(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <2 x float> %pos) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_wqm1:
 ; GFX1032:       ; %bb.0: ; %main_body
 ; GFX1032-NEXT:    s_mov_b32 s0, exec_lo
@@ -2095,7 +2095,7 @@ main_body:
   ret <4 x float> %tex
 }
 
-define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) #0 {
+define amdgpu_ps float @test_wqm2(i32 inreg %idx0, i32 inreg %idx1) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_wqm2:
 ; GFX1032:       ; %bb.0: ; %main_body
 ; GFX1032-NEXT:    s_mov_b32 s2, exec_lo
@@ -2290,7 +2290,7 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
   ret void
 }
 
-define amdgpu_kernel void @test_branch_true() #2 {
+define amdgpu_kernel void @test_branch_true() nounwind readnone optnone noinline {
 ; GFX1032-LABEL: test_branch_true:
 ; GFX1032:       ; %bb.0: ; %entry
 ; GFX1032-NEXT:    s_mov_b32 vcc_lo, exec_lo
@@ -2333,7 +2333,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-define amdgpu_ps float @test_ps_live() #0 {
+define amdgpu_ps float @test_ps_live() nounwind readnone speculatable {
 ; GFX1032-LABEL: test_ps_live:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_mov_b32 s0, exec_lo
@@ -2351,7 +2351,7 @@ define amdgpu_ps float @test_ps_live() #0 {
   ret float %r
 }
 
-define amdgpu_kernel void @test_vccnz_ifcvt_triangle64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @test_vccnz_ifcvt_triangle64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind readnone speculatable {
 ; GFX1032-LABEL: test_vccnz_ifcvt_triangle64:
 ; GFX1032:       ; %bb.0: ; %entry
 ; GFX1032-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -2421,7 +2421,7 @@ define amdgpu_gs float @test_vgprblocks_w32_attr(float %a, float %b, float %c, f
 ; GCN-NEXT:    v_add_f32_e32 v0, v0, v10
 ; GCN-NEXT:    v_add_f32_e32 v0, v0, v11
 ; GCN-NEXT:    ; return to shader part epilog
-                                        float %f, float %g, float %h, float %i, float %j, float %k, float %l) #3 {
+                                        float %f, float %g, float %h, float %i, float %j, float %k, float %l) "target-features"="+wavefrontsize32" {
 main_body:
   %s = fadd float %a, %b
   %s.1 = fadd float %s, %c
@@ -2452,7 +2452,7 @@ define amdgpu_gs float @test_vgprblocks_w64_attr(float %a, float %b, float %c, f
 ; GCN-NEXT:    v_add_f32_e32 v0, v0, v10
 ; GCN-NEXT:    v_add_f32_e32 v0, v0, v11
 ; GCN-NEXT:    ; return to shader part epilog
-                                        float %f, float %g, float %h, float %i, float %j, float %k, float %l) #4 {
+                                        float %f, float %g, float %h, float %i, float %j, float %k, float %l) "target-features"="+wavefrontsize64" {
 main_body:
   %s = fadd float %a, %b
   %s.1 = fadd float %s, %c
@@ -2850,9 +2850,9 @@ if.end2:                                          ; preds = %if.end
   ret void
 }
 
-declare void @external_void_func_void() #1
+declare void @external_void_func_void() nounwind
 
-define void @callee_no_stack_with_call() #1 {
+define void @callee_no_stack_with_call() nounwind {
 ; GFX1032-LABEL: callee_no_stack_with_call:
 ; GFX1032:       ; %bb.0:
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2947,13 +2947,6 @@ declare i1 @llvm.amdgcn.wqm.vote(i1)
 declare i1 @llvm.amdgcn.ps.live()
 declare i64 @llvm.cttz.i64(i64, i1)
 declare i32 @llvm.cttz.i32(i32, i1)
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #5
-
-attributes #0 = { nounwind readnone speculatable }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone optnone noinline }
-attributes #3 = { "target-features"="+wavefrontsize32" }
-attributes #4 = { "target-features"="+wavefrontsize64" }
-attributes #5 = { inaccessiblememonly nounwind }
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) inaccessiblememonly nounwind
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX10DEFWAVE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/while-break.ll b/llvm/test/CodeGen/AMDGPU/while-break.ll
index 13b37b40ee95c0..ffcbbd154bb18e 100644
--- a/llvm/test/CodeGen/AMDGPU/while-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/while-break.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
 
-define amdgpu_ps float @while_break(i32 %z, float %v, i32 %x, i32 %y) #0 {
+define amdgpu_ps float @while_break(i32 %z, float %v, i32 %x, i32 %y) nounwind {
 ; GCN-LABEL: while_break:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_mov_b32 s1, -1
@@ -76,7 +76,7 @@ end:
 }
 
 ; Just different dfs order from while_break.
-define amdgpu_ps float @while_break2(i32 %z, float %v, i32 %x, i32 %y) #0 {
+define amdgpu_ps float @while_break2(i32 %z, float %v, i32 %x, i32 %y) nounwind {
 ; GCN-LABEL: while_break2:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_mov_b32 s1, -1
@@ -151,5 +151,3 @@ end:
   %r = phi float [ %v.2, %latch ], [ %v.1, %else ]
   ret float %r
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
index f680bbdd05cdd2..3ef56bdc801627 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
@@ -6,7 +6,7 @@
 ; by introducing a copy to AGPR register. The VGPR store to AGPR (v_accvgpr_write_b32) and later the
 ; restore from AGPR (v_accvgpr_read_b32) should be whole-wave operations and hence exec mask should be
 ; manipulated to ensure all lanes are active when these instructions are executed.
-define void @vector_reg_liverange_split() #0 {
+define void @vector_reg_liverange_split() "amdgpu-num-vgpr"="41" "amdgpu-num-sgpr"="34" {
 ; GFX90A-LABEL: vector_reg_liverange_split:
 ; GFX90A:       ; %bb.0:
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -68,5 +68,3 @@ define void @vector_reg_liverange_split() #0 {
 }
 
 declare void @foo()
-
-attributes #0 = { "amdgpu-num-vgpr"="41" "amdgpu-num-sgpr"="34"}
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
index 3a33194f17c875..8acd6d314f0b8d 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
@@ -12,7 +12,7 @@
 ; must spill a scratch VGPR. The writelane/readlane instructions that spill/restore SGPRs into/from VGPR
 ; are whole-wave operations and hence the VGPRs involved in such operations require whole-wave spilling.
 
-define void @test() #0 {
+define void @test() nounwind "amdgpu-num-vgpr"="41" "amdgpu-num-sgpr"="34" {
 ; GCN-LABEL: test:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -130,12 +130,10 @@ define void @test() #0 {
 ; GCN-O0-NEXT:    s_mov_b32 s33, s4
 ; GCN-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-O0-NEXT:    s_setpc_b64 s[30:31]
-  %sgpr = call i32 asm sideeffect "; def $0", "=s" () #0
+  %sgpr = call i32 asm sideeffect "; def $0", "=s" () nounwind "amdgpu-num-vgpr"="41" "amdgpu-num-sgpr"="34"
   call void @ext_func()
   store volatile i32 %sgpr, ptr addrspace(1) undef
   ret void
 }
 
 declare void @ext_func();
-
-attributes #0 = { nounwind "amdgpu-num-vgpr"="41" "amdgpu-num-sgpr"="34"}
diff --git a/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
index 186f50480a34d4..bc844b3d5990ff 100644
--- a/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
@@ -9,7 +9,7 @@
 ; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
 ; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
 ; GCN: v_cmp_lt_i64_e32 vcc, -1, v[[[VSEL]]:[[VSEL_EXT]]]
-define amdgpu_kernel void @widen_vselect_and_mask_v4f64(<4 x double> %arg) #0 {
+define amdgpu_kernel void @widen_vselect_and_mask_v4f64(<4 x double> %arg) nounwind {
 bb:
   %tmp = extractelement <4 x double> %arg, i64 0
   %tmp1 = fcmp uno double %tmp, 0.000000e+00
@@ -31,7 +31,7 @@ bb:
 ; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
 ; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
 ; GCN: v_cmp_lt_i64_e32 vcc, -1, v[[[VSEL]]:[[VSEL_EXT]]]
-define amdgpu_kernel void @widen_vselect_and_mask_v4i64(<4 x i64> %arg) #0 {
+define amdgpu_kernel void @widen_vselect_and_mask_v4i64(<4 x i64> %arg) nounwind {
 bb:
   %tmp = extractelement <4 x i64> %arg, i64 0
   %tmp1 = icmp eq i64 %tmp, 0
@@ -47,6 +47,3 @@ bb:
   store <4 x i64> %tmp10, ptr addrspace(1) null, align 32
   ret void
 }
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll b/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
index 24c1875159f673..40ad1d222a0a2e 100644
--- a/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-widen-constant-loads < %s | FileCheck -check-prefix=OPT %s
 
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
+declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() nounwind
 
-define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i1(
 ; OPT-NEXT:    [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 1
 ; OPT-NEXT:    store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1
@@ -14,7 +14,7 @@ define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i1_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i1_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i1_align2(
 ; OPT-NEXT:    [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 2
 ; OPT-NEXT:    store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -25,7 +25,7 @@ define amdgpu_kernel void @constant_load_i1_align2(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i1_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i1_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i1_align4(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i1
@@ -37,7 +37,7 @@ define amdgpu_kernel void @constant_load_i1_align4(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i8(
 ; OPT-NEXT:    [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 1
 ; OPT-NEXT:    store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1
@@ -48,7 +48,7 @@ define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i8_align2(
 ; OPT-NEXT:    [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 2
 ; OPT-NEXT:    store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -59,7 +59,7 @@ define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i8align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i8align4(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i8align4(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
@@ -71,7 +71,7 @@ define amdgpu_kernel void @constant_load_i8align4(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_v2i8(
 ; OPT-NEXT:    [[LD:%.*]] = load <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2
 ; OPT-NEXT:    store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -82,7 +82,7 @@ define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v2i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v2i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_v2i8_align4(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
@@ -95,7 +95,7 @@ define amdgpu_kernel void @constant_load_v2i8_align4(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_v3i8(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24
@@ -108,7 +108,7 @@ define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspa
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v3i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v3i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_v3i8_align4(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24
@@ -121,7 +121,7 @@ define amdgpu_kernel void @constant_load_v3i8_align4(ptr addrspace(1) %out, ptr
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i16(
 ; OPT-NEXT:    [[LD:%.*]] = load i16, ptr addrspace(4) [[IN:%.*]], align 2
 ; OPT-NEXT:    [[EXT:%.*]] = sext i16 [[LD]] to i32
@@ -134,7 +134,7 @@ define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i16_align4(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
@@ -148,7 +148,7 @@ define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-define amdgpu_kernel void @constant_load_f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_f16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_f16(
 ; OPT-NEXT:    [[LD:%.*]] = load half, ptr addrspace(4) [[IN:%.*]], align 2
 ; OPT-NEXT:    store half [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -159,7 +159,7 @@ define amdgpu_kernel void @constant_load_f16(ptr addrspace(1) %out, ptr addrspac
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v2f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v2f16(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_v2f16(
 ; OPT-NEXT:    [[LD:%.*]] = load <2 x half>, ptr addrspace(4) [[IN:%.*]], align 4
 ; OPT-NEXT:    store <2 x half> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
@@ -181,7 +181,7 @@ define amdgpu_kernel void @load_volatile(ptr addrspace(1) %out, ptr addrspace(4)
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v2i8_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_v2i8_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_v2i8_volatile(
 ; OPT-NEXT:    [[LD:%.*]] = load volatile <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2
 ; OPT-NEXT:    store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -192,7 +192,7 @@ define amdgpu_kernel void @constant_load_v2i8_volatile(ptr addrspace(1) %out, pt
   ret void
 }
 
-define amdgpu_kernel void @constant_load_v2i8_addrspace1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @constant_load_v2i8_addrspace1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; OPT-LABEL: @constant_load_v2i8_addrspace1(
 ; OPT-NEXT:    [[LD:%.*]] = load <2 x i8>, ptr addrspace(1) [[IN:%.*]], align 2
 ; OPT-NEXT:    store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -219,7 +219,7 @@ define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) #1 {
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4_range(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i16_align4_range(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0:![0-9]+]]
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
@@ -233,7 +233,7 @@ define amdgpu_kernel void @constant_load_i16_align4_range(ptr addrspace(1) %out,
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4_range_max(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4_range_max(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i16_align4_range_max(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0]]
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
@@ -247,7 +247,7 @@ define amdgpu_kernel void @constant_load_i16_align4_range_max(ptr addrspace(1) %
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4_complex_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4_complex_range(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i16_align4_complex_range(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG1:![0-9]+]]
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
@@ -261,7 +261,7 @@ define amdgpu_kernel void @constant_load_i16_align4_complex_range(ptr addrspace(
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4_range_from_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4_range_from_0(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i16_align4_range_from_0(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
@@ -275,7 +275,7 @@ define amdgpu_kernel void @constant_load_i16_align4_range_from_0(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i16_align4_range_from_neg(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2:![0-9]+]]
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
@@ -289,7 +289,7 @@ define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(ptr addrspace
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2]]
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
@@ -303,7 +303,7 @@ define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(ptr addr
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4_invariant(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4_invariant(ptr addrspace(1) %out, ptr addrspace(4) %in) nounwind {
 ; OPT-LABEL: @constant_load_i16_align4_invariant(
 ; OPT-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !invariant.load !3
 ; OPT-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
@@ -317,8 +317,6 @@ define amdgpu_kernel void @constant_load_i16_align4_invariant(ptr addrspace(1) %
   ret void
 }
 
-attributes #0 = { nounwind }
-
 ; OPT: !0 = !{i32 5, i32 0}
 ; OPT: !1 = !{i32 8, i32 0}
 ; OPT: !2 = !{i32 65520, i32 0}
diff --git a/llvm/test/CodeGen/AMDGPU/wqm-gfx11.ll b/llvm/test/CodeGen/AMDGPU/wqm-gfx11.ll
index d7ce562292b869..1d8a6b6fda230b 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm-gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm-gfx11.ll
@@ -18,9 +18,9 @@ define amdgpu_ps <3 x float> @test_param_load(i32 inreg %attr, <3 x float> %to_a
 ; CHECK-NEXT:    v_add_f32_e32 v2, v5, v2
 ; CHECK-NEXT:    ; return to shader part epilog
 main_body:
-  %a = call float @llvm.amdgcn.lds.param.load(i32 immarg 0, i32 immarg 0, i32 %attr) #1
-  %b = call float @llvm.amdgcn.lds.param.load(i32 immarg 1, i32 immarg 0, i32 %attr) #1
-  %c = call float @llvm.amdgcn.lds.param.load(i32 immarg 2, i32 immarg 0, i32 %attr) #1
+  %a = call float @llvm.amdgcn.lds.param.load(i32 immarg 0, i32 immarg 0, i32 %attr) nounwind readnone speculatable willreturn
+  %b = call float @llvm.amdgcn.lds.param.load(i32 immarg 1, i32 immarg 0, i32 %attr) nounwind readnone speculatable willreturn
+  %c = call float @llvm.amdgcn.lds.param.load(i32 immarg 2, i32 immarg 0, i32 %attr) nounwind readnone speculatable willreturn
   %tmp_0 = insertelement <3 x float> undef, float %a, i32 0
   %tmp_1 = insertelement <3 x float> %tmp_0, float %b, i32 1
   %tmp_2 = insertelement <3 x float> %tmp_1, float %c, i32 2
@@ -47,16 +47,14 @@ define amdgpu_ps <3 x float> @test_direct_load(i32 inreg %arg_0, i32 inreg %arg_
 ; CHECK-NEXT:    v_add_f32_e32 v2, v5, v2
 ; CHECK-NEXT:    ; return to shader part epilog
 main_body:
-  %a = call float @llvm.amdgcn.lds.direct.load(i32 %arg_0) #1
-  %b = call float @llvm.amdgcn.lds.direct.load(i32 %arg_1) #1
-  %c = call float @llvm.amdgcn.lds.direct.load(i32 %arg_2) #1
+  %a = call float @llvm.amdgcn.lds.direct.load(i32 %arg_0) nounwind readnone speculatable willreturn
+  %b = call float @llvm.amdgcn.lds.direct.load(i32 %arg_1) nounwind readnone speculatable willreturn
+  %c = call float @llvm.amdgcn.lds.direct.load(i32 %arg_2) nounwind readnone speculatable willreturn
   %tmp_0 = insertelement <3 x float> undef, float %a, i32 0
   %tmp_1 = insertelement <3 x float> %tmp_0, float %b, i32 1
   %tmp_2 = insertelement <3 x float> %tmp_1, float %c, i32 2
   %res = fadd <3 x float> %tmp_2, %to_add
   ret  <3 x float> %res
 }
-
-attributes #1 = { nounwind readnone speculatable willreturn }
-declare float @llvm.amdgcn.lds.param.load(i32 immarg, i32 immarg, i32) #1
-declare float @llvm.amdgcn.lds.direct.load(i32) #1
+declare float @llvm.amdgcn.lds.param.load(i32 immarg, i32 immarg, i32) nounwind readnone speculatable willreturn
+declare float @llvm.amdgcn.lds.direct.load(i32) nounwind readnone speculatable willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 95dfb12c8dbaec..aa7b6f12fba3b7 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -27,7 +27,7 @@ main_body:
 }
 
 ; Check that WQM is triggered by code calculating inputs to image samples and is disabled as soon as possible
-define amdgpu_ps <4 x float> @test2(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <2 x float> %pos) #6 {
+define amdgpu_ps <4 x float> @test2(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <2 x float> %pos) nounwind "InitialPSInputAddr"="2" {
 ; GFX9-W64-LABEL: test2:
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    s_mov_b64 s[0:1], exec
@@ -131,7 +131,7 @@ main_body:
 }
 
 ; ... and disabled for export.
-define amdgpu_ps void @test3x(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <2 x float> %pos) #6 {
+define amdgpu_ps void @test3x(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <2 x float> %pos) nounwind "InitialPSInputAddr"="2" {
 ; GFX9-W64-LABEL: test3x:
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    s_mov_b64 s[0:1], exec
@@ -1906,7 +1906,7 @@ main_body:
 }
 
 ; Check prolog shaders.
-define amdgpu_ps float @test_prolog_1(float %a, float %b) #5 {
+define amdgpu_ps float @test_prolog_1(float %a, float %b) "amdgpu-ps-wqm-outputs" {
 ; GFX9-W64-LABEL: test_prolog_1:
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    s_mov_b64 s[0:1], exec
@@ -2176,7 +2176,7 @@ else:
 }
 
 ; Test awareness that s_wqm_b64 clobbers SCC.
-define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) #1 {
+define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) nounwind {
 ; GFX9-W64-LABEL: test_scc:
 ; GFX9-W64:       ; %bb.0: ; %main_body
 ; GFX9-W64-NEXT:    s_mov_b64 s[2:3], exec
@@ -3306,56 +3306,48 @@ define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(ptr addrspace(6) in
 main_body:
   %1 = ptrtoint ptr addrspace(6) %0 to i32
   %2 = insertelement <4 x i32> <i32 poison, i32 32768, i32 32, i32 822177708>, i32 %1, i32 0
-  %3 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %2, i32 0, i32 0) #3
+  %3 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %2, i32 0, i32 0) nounwind readnone
   %4 = fcmp nsz arcp ugt float %3, 0.000000e+00
-  call void @llvm.amdgcn.kill(i1 %4) #1
+  call void @llvm.amdgcn.kill(i1 %4) nounwind
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1
-
-declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #2
-declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #3
-declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #3
-
-declare void @llvm.amdgcn.struct.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #2
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #3
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #3
-
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare float @llvm.amdgcn.image.sample.1d.f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare void @llvm.amdgcn.kill(i1) #1
-declare float @llvm.amdgcn.wqm.f32(float) #3
-declare i32 @llvm.amdgcn.wqm.i32(i32) #3
-declare float @llvm.amdgcn.strict.wwm.f32(float) #3
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #3
-declare float @llvm.amdgcn.wwm.f32(float) #3
-declare i32 @llvm.amdgcn.wwm.i32(i32) #3
-declare float @llvm.amdgcn.strict.wqm.f32(float) #3
-declare i32 @llvm.amdgcn.strict.wqm.i32(i32) #3
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #4
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #3
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #3
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #3
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind
+declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) nounwind
+
+declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) nounwind readonly
+declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) nounwind readnone
+
+declare void @llvm.amdgcn.struct.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) nounwind readonly
+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) nounwind readnone
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
+declare float @llvm.amdgcn.image.sample.1d.f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) nounwind readnone
+declare void @llvm.amdgcn.kill(i1) nounwind
+declare float @llvm.amdgcn.wqm.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.wqm.i32(i32) nounwind readnone
+declare float @llvm.amdgcn.strict.wwm.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.strict.wwm.i32(i32) nounwind readnone
+declare float @llvm.amdgcn.wwm.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.wwm.i32(i32) nounwind readnone
+declare float @llvm.amdgcn.strict.wqm.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.strict.wqm.i32(i32) nounwind readnone
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) nounwind readnone convergent
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) nounwind readnone
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) nounwind readnone
+declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) nounwind readonly
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) nounwind readonly
 declare i32 @llvm.amdgcn.ds.swizzle(i32, i32)
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) #7
-
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readonly }
-attributes #3 = { nounwind readnone }
-attributes #4 = { nounwind readnone convergent }
-attributes #5 = { "amdgpu-ps-wqm-outputs" }
-attributes #6 = { nounwind "InitialPSInputAddr"="2" }
-attributes #7 = { nounwind readnone willreturn }
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) nounwind readnone willreturn
diff --git a/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll b/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
index de3b1d5bf78b35..5f5448b32dc96c 100644
--- a/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
@@ -6,19 +6,15 @@
 ; vgpr value into a scalar register, but I don't think there's much we
 ; can do to avoid this.
 
-declare void @llvm.write_register.i32(metadata, i32) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.amdgcn.wave.barrier() #2
+declare void @llvm.write_register.i32(metadata, i32) nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+declare void @llvm.amdgcn.wave.barrier() convergent nounwind
 
 define amdgpu_kernel void @write_vgpr_into_sgpr() {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   call void @llvm.write_register.i32(metadata !0, i32 %tid)
-  call void @llvm.amdgcn.wave.barrier() #2
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { convergent nounwind }
-
 !0 = !{!"exec_lo"}
diff --git a/llvm/test/CodeGen/AMDGPU/write_register.ll b/llvm/test/CodeGen/AMDGPU/write_register.ll
index f6ac26e8ecfc66..17268a615dfc06 100644
--- a/llvm/test/CodeGen/AMDGPU/write_register.ll
+++ b/llvm/test/CodeGen/AMDGPU/write_register.ll
@@ -1,15 +1,15 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s
 
-declare void @llvm.write_register.i32(metadata, i32) #0
-declare void @llvm.write_register.i64(metadata, i64) #0
+declare void @llvm.write_register.i32(metadata, i32) nounwind
+declare void @llvm.write_register.i64(metadata, i64) nounwind
 
 ; CHECK-LABEL: {{^}}test_write_m0:
-define amdgpu_kernel void @test_write_m0(i32 %val) #0 {
+define amdgpu_kernel void @test_write_m0(i32 %val) nounwind {
   call void @llvm.write_register.i32(metadata !0, i32 0)
   call void @llvm.write_register.i32(metadata !0, i32 -1)
   call void @llvm.write_register.i32(metadata !0, i32 %val)
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
@@ -17,82 +17,79 @@ define amdgpu_kernel void @test_write_m0(i32 %val) #0 {
 ; CHECK: s_mov_b64 exec, 0
 ; CHECK: s_mov_b64 exec, -1
 ; CHECK: s_mov_b64 exec, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @test_write_exec(i64 %val) #0 {
+define amdgpu_kernel void @test_write_exec(i64 %val) nounwind {
   call void @llvm.write_register.i64(metadata !1, i64 0)
   call void @llvm.write_register.i64(metadata !1, i64 -1)
   call void @llvm.write_register.i64(metadata !1, i64 %val)
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
 ; CHECK-LABEL: {{^}}test_write_flat_scratch_0:
 ; CHECK: s_mov_b64 flat_scratch, 0
-define amdgpu_kernel void @test_write_flat_scratch_0(i64 %val) #0 {
+define amdgpu_kernel void @test_write_flat_scratch_0(i64 %val) nounwind {
   call void @llvm.write_register.i64(metadata !2, i64 0)
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
 ; CHECK-LABEL: {{^}}test_write_flat_scratch_neg1:
 ; CHECK: s_mov_b64 flat_scratch, -1
-define amdgpu_kernel void @test_write_flat_scratch_neg1(i64 %val) #0 {
+define amdgpu_kernel void @test_write_flat_scratch_neg1(i64 %val) nounwind {
   call void @llvm.write_register.i64(metadata !2, i64 -1)
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
 ; CHECK-LABEL: {{^}}test_write_flat_scratch_val:
 ; CHECK: s_load_dwordx2 flat_scratch, s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @test_write_flat_scratch_val(i64 %val) #0 {
+define amdgpu_kernel void @test_write_flat_scratch_val(i64 %val) nounwind {
   call void @llvm.write_register.i64(metadata !2, i64 %val)
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
 ; CHECK-LABEL: {{^}}test_write_flat_scratch_lo:
 ; CHECK: s_mov_b32 flat_scratch_lo, 0
 ; CHECK: s_mov_b32 flat_scratch_lo, s{{[0-9]+}}
-define amdgpu_kernel void @test_write_flat_scratch_lo(i32 %val) #0 {
+define amdgpu_kernel void @test_write_flat_scratch_lo(i32 %val) nounwind {
   call void @llvm.write_register.i32(metadata !3, i32 0)
   call void @llvm.write_register.i32(metadata !3, i32 %val)
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
 ; CHECK-LABEL: {{^}}test_write_flat_scratch_hi:
 ; CHECK: s_mov_b32 flat_scratch_hi, 0
 ; CHECK: s_mov_b32 flat_scratch_hi, s{{[0-9]+}}
-define amdgpu_kernel void @test_write_flat_scratch_hi(i32 %val) #0 {
+define amdgpu_kernel void @test_write_flat_scratch_hi(i32 %val) nounwind {
   call void @llvm.write_register.i32(metadata !4, i32 0)
   call void @llvm.write_register.i32(metadata !4, i32 %val)
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
 ; CHECK-LABEL: {{^}}test_write_exec_lo:
 ; CHECK: s_mov_b32 exec_lo, 0
 ; CHECK: s_mov_b32 exec_lo, s{{[0-9]+}}
-define amdgpu_kernel void @test_write_exec_lo(i32 %val) #0 {
+define amdgpu_kernel void @test_write_exec_lo(i32 %val) nounwind {
   call void @llvm.write_register.i32(metadata !5, i32 0)
   call void @llvm.write_register.i32(metadata !5, i32 %val)
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
 ; CHECK-LABEL: {{^}}test_write_exec_hi:
 ; CHECK: s_mov_b32 exec_hi, 0
 ; CHECK: s_mov_b32 exec_hi, s{{[0-9]+}}
-define amdgpu_kernel void @test_write_exec_hi(i32 %val) #0 {
+define amdgpu_kernel void @test_write_exec_hi(i32 %val) nounwind {
   call void @llvm.write_register.i32(metadata !6, i32 0)
   call void @llvm.write_register.i32(metadata !6, i32 %val)
-  call void @llvm.amdgcn.wave.barrier() #1
+  call void @llvm.amdgcn.wave.barrier() convergent nounwind
   ret void
 }
 
-declare void @llvm.amdgcn.wave.barrier() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
+declare void @llvm.amdgcn.wave.barrier() convergent nounwind
 
 !0 = !{!"m0"}
 !1 = !{!"exec"}
diff --git a/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll b/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll
index bdfa89d9f3044f..7c49b45cb800ca 100644
--- a/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll
+++ b/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll
@@ -4,30 +4,30 @@
 ;CHECK: {{^}}fill3d:
 ;CHECK-NOT: MULLO_INT T[0-9]+
 
-define amdgpu_kernel void @fill3d(ptr addrspace(1) nocapture %out) #0 {
+define amdgpu_kernel void @fill3d(ptr addrspace(1) nocapture %out) nounwind {
 entry:
-  %x.i = tail call i32 @llvm.r600.read.global.size.x() #1
-  %y.i18 = tail call i32 @llvm.r600.read.global.size.y() #1
+  %x.i = tail call i32 @llvm.r600.read.global.size.x() nounwind readnone
+  %y.i18 = tail call i32 @llvm.r600.read.global.size.y() nounwind readnone
   %mul = mul i32 %y.i18, %x.i
-  %z.i17 = tail call i32 @llvm.r600.read.global.size.z() #1
+  %z.i17 = tail call i32 @llvm.r600.read.global.size.z() nounwind readnone
   %mul3 = mul i32 %mul, %z.i17
-  %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
-  %x.i12.i = tail call i32 @llvm.r600.read.local.size.x() #1
+  %x.i.i = tail call i32 @llvm.r600.read.tgid.x() nounwind readnone
+  %x.i12.i = tail call i32 @llvm.r600.read.local.size.x() nounwind readnone
   %mul26.i = mul i32 %x.i12.i, %x.i.i
-  %x.i4.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i4.i = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %add.i16 = add i32 %x.i4.i, %mul26.i
   %mul7 = mul i32 %add.i16, %y.i18
-  %y.i.i = tail call i32 @llvm.r600.read.tgid.y() #1
-  %y.i14.i = tail call i32 @llvm.r600.read.local.size.y() #1
+  %y.i.i = tail call i32 @llvm.r600.read.tgid.y() nounwind readnone
+  %y.i14.i = tail call i32 @llvm.r600.read.local.size.y() nounwind readnone
   %mul30.i = mul i32 %y.i14.i, %y.i.i
-  %y.i6.i = tail call i32 @llvm.r600.read.tidig.y() #1
+  %y.i6.i = tail call i32 @llvm.r600.read.tidig.y() nounwind readnone
   %add.i14 = add i32 %mul30.i, %mul7
   %mul819 = add i32 %add.i14, %y.i6.i
   %add = mul i32 %mul819, %z.i17
-  %z.i.i = tail call i32 @llvm.r600.read.tgid.z() #1
-  %z.i16.i = tail call i32 @llvm.r600.read.local.size.z() #1
+  %z.i.i = tail call i32 @llvm.r600.read.tgid.z() nounwind readnone
+  %z.i16.i = tail call i32 @llvm.r600.read.local.size.z() nounwind readnone
   %mul33.i = mul i32 %z.i16.i, %z.i.i
-  %z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
+  %z.i8.i = tail call i32 @llvm.r600.read.tidig.z() nounwind readnone
   %add.i = add i32 %z.i8.i, %mul33.i
   %add13 = add i32 %add.i, %add
   %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %add13
@@ -36,43 +36,40 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.x() #1
+declare i32 @llvm.r600.read.tgid.x() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.y() #1
+declare i32 @llvm.r600.read.tgid.y() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.z() #1
+declare i32 @llvm.r600.read.tgid.z() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.local.size.x() #1
+declare i32 @llvm.r600.read.local.size.x() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.local.size.y() #1
+declare i32 @llvm.r600.read.local.size.y() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.local.size.z() #1
+declare i32 @llvm.r600.read.local.size.z() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.y() #1
+declare i32 @llvm.r600.read.tidig.y() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.z() #1
+declare i32 @llvm.r600.read.tidig.z() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.global.size.x() #1
+declare i32 @llvm.r600.read.global.size.x() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.global.size.y() #1
+declare i32 @llvm.r600.read.global.size.y() nounwind readnone
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.global.size.z() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+declare i32 @llvm.r600.read.global.size.z() nounwind readnone
 
 !opencl.kernels = !{!0, !1, !2}
 
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index 11f6a2960776b0..ee1ecb5950541e 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -919,7 +919,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in
 }
 
 ; FIXME: This spills v40 and v41 twice, once in whole-wave-mode and once for the active lanes
-define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, ptr addrspace(5) %ptr, i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) #0 {
+define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, ptr addrspace(5) %ptr, i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) "amdgpu-waves-per-eu"="5,5" {
 ; GFX9-O0-LABEL: strict_wwm_callee_saves:
 ; GFX9-O0:       ; %bb.0:
 ; GFX9-O0-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1404,5 +1404,3 @@ declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(
 declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32)
 declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
 declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
-
-attributes #0 = { "amdgpu-waves-per-eu"="5,5" }
diff --git a/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-any.ll b/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-any.ll
index b7da3b77c96371..b67d75f2e5eda3 100644
--- a/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-any.ll
+++ b/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-any.ll
@@ -10,8 +10,6 @@
 
 ; NOT-SUPPORTED: xnack setting for subtarget: Unsupported
 ; ANY: xnack setting for subtarget: Any
-define void @xnack-subtarget-feature-any() #0 {
+define void @xnack-subtarget-feature-any() nounwind {
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-disabled.ll b/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-disabled.ll
index 23baeabc6a1bb4..701a694130c225 100644
--- a/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-disabled.ll
+++ b/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-disabled.ll
@@ -11,8 +11,6 @@
 ; WARN: warning: xnack 'Off' was requested for a processor that does not support it!
 ; OFF: xnack setting for subtarget: Off
 
-define void @xnack-subtarget-feature-disabled() #0 {
+define void @xnack-subtarget-feature-disabled() "target-features"="-xnack" {
   ret void
 }
-
-attributes #0 = { "target-features"="-xnack" }
diff --git a/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-enabled.ll b/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-enabled.ll
index a52c842afb291f..868cff0f580621 100644
--- a/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-enabled.ll
+++ b/llvm/test/CodeGen/AMDGPU/xnack-subtarget-feature-enabled.ll
@@ -10,8 +10,6 @@
 
 ; WARN: warning: xnack 'On' was requested for a processor that does not support it!
 ; ON: xnack setting for subtarget: On
-define void @xnack-subtarget-feature-enabled() #0 {
+define void @xnack-subtarget-feature-enabled() "target-features"="+xnack" {
   ret void
 }
-
-attributes #0 = { "target-features"="+xnack" }
diff --git a/llvm/test/CodeGen/AMDGPU/zero_extend.ll b/llvm/test/CodeGen/AMDGPU/zero_extend.ll
index f9137b075e4622..5a6a8f0da7dc40 100644
--- a/llvm/test/CodeGen/AMDGPU/zero_extend.ll
+++ b/llvm/test/CodeGen/AMDGPU/zero_extend.ll
@@ -9,7 +9,7 @@
 ; GCN: {{^}}s_mad_zext_i32_to_i64:
 ; GCN: v_mov_b32_e32 v[[V_ZERO:[0-9]]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[0:[[V_ZERO]]]
-define amdgpu_kernel void @s_mad_zext_i32_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) #0 {
+define amdgpu_kernel void @s_mad_zext_i32_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) nounwind {
 entry:
   %tmp0 = mul i32 %a, %b
   %tmp1 = add i32 %tmp0, %c
@@ -20,7 +20,7 @@ entry:
 
 ; GCN-LABEL: {{^}}s_cmp_zext_i1_to_i32
 ; GCN: v_cndmask_b32
-define amdgpu_kernel void @s_cmp_zext_i1_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_cmp_zext_i1_to_i32(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
 entry:
   %tmp0 = icmp eq i32 %a, %b
   %tmp1 = zext i1 %tmp0 to i32
@@ -29,7 +29,7 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}s_arg_zext_i1_to_i64:
-define amdgpu_kernel void @s_arg_zext_i1_to_i64(ptr addrspace(1) %out, i1 zeroext %arg) #0 {
+define amdgpu_kernel void @s_arg_zext_i1_to_i64(ptr addrspace(1) %out, i1 zeroext %arg) nounwind {
   %ext = zext i1 %arg to i64
   store i64 %ext, ptr addrspace(1) %out, align 8
   ret void
@@ -39,7 +39,7 @@ define amdgpu_kernel void @s_arg_zext_i1_to_i64(ptr addrspace(1) %out, i1 zeroex
 ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0
 ; GCN-DAG: s_cmp_eq_u32
 ; GCN:     v_cndmask_b32
-define amdgpu_kernel void @s_cmp_zext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @s_cmp_zext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind {
   %cmp = icmp eq i32 %a, %b
   %ext = zext i1 %cmp to i64
   store i64 %ext, ptr addrspace(1) %out, align 8
@@ -57,11 +57,9 @@ define amdgpu_kernel void @s_cmp_zext_i1_to_i64(ptr addrspace(1) %out, i32 %a, i
 ; GCN: s_cselect_b64 [[CC:s\[[0-9:]+\]]], -1, 0
 ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]]
 ; GCN: buffer_store_short [[RESULT]]
-define amdgpu_kernel void @s_cmp_zext_i1_to_i16(ptr addrspace(1) %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) #0 {
+define amdgpu_kernel void @s_cmp_zext_i1_to_i16(ptr addrspace(1) %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) nounwind {
   %tmp0 = icmp eq i16 %a, %b
   %tmp1 = zext i1 %tmp0 to i16
   store i16 %tmp1, ptr addrspace(1) %out
   ret void
 }
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/zext-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/zext-divergence-driven-isel.ll
index f9a7e887ada239..1151d8aee72b42 100644
--- a/llvm/test/CodeGen/AMDGPU/zext-divergence-driven-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/zext-divergence-driven-isel.ll
@@ -86,7 +86,4 @@ define amdgpu_kernel void @zext_i16_to_i64_divergent(ptr addrspace(1) %out, i16
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone speculatable }
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
diff --git a/llvm/test/CodeGen/AMDGPU/zext-lid.ll b/llvm/test/CodeGen/AMDGPU/zext-lid.ll
index 6fea05d8d7406e..d0eaa5b7932a6c 100644
--- a/llvm/test/CodeGen/AMDGPU/zext-lid.ll
+++ b/llvm/test/CodeGen/AMDGPU/zext-lid.ll
@@ -3,7 +3,7 @@
 
 ; GCN-LABEL: {{^}}zext_grp_size_128:
 ; GCN-NOT: and_b32
-define amdgpu_kernel void @zext_grp_size_128(ptr addrspace(1) nocapture %arg) #0 {
+define amdgpu_kernel void @zext_grp_size_128(ptr addrspace(1) nocapture %arg) nounwind "amdgpu-flat-work-group-size"="64,128" {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = and i32 %tmp, 127
@@ -21,7 +21,7 @@ bb:
 
 ; GCN-LABEL: {{^}}zext_grp_size_32x4x1:
 ; GCN-NOT: and_b32
-define amdgpu_kernel void @zext_grp_size_32x4x1(ptr addrspace(1) nocapture %arg) #0 !reqd_work_group_size !0 {
+define amdgpu_kernel void @zext_grp_size_32x4x1(ptr addrspace(1) nocapture %arg) nounwind "amdgpu-flat-work-group-size"="64,128" !reqd_work_group_size !0 {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = and i32 %tmp, 31
@@ -42,7 +42,7 @@ bb:
 
 ; When EarlyCSE is not run this call produces a range max with 0 active bits,
 ; which is a special case as an AssertZext from width 0 is invalid.
-define amdgpu_kernel void @zext_grp_size_1x1x1(ptr addrspace(1) nocapture %arg) #0 !reqd_work_group_size !1 {
+define amdgpu_kernel void @zext_grp_size_1x1x1(ptr addrspace(1) nocapture %arg) nounwind "amdgpu-flat-work-group-size"="64,128" !reqd_work_group_size !1 {
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = and i32 %tmp, 1
   store i32 %tmp1, ptr addrspace(1) %arg, align 4
@@ -51,7 +51,7 @@ define amdgpu_kernel void @zext_grp_size_1x1x1(ptr addrspace(1) nocapture %arg)
 
 ; GCN-LABEL: {{^}}zext_grp_size_512:
 ; GCN-NOT: and_b32
-define amdgpu_kernel void @zext_grp_size_512(ptr addrspace(1) nocapture %arg) #1 {
+define amdgpu_kernel void @zext_grp_size_512(ptr addrspace(1) nocapture %arg) nounwind "amdgpu-flat-work-group-size"="512,512" {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = and i32 %tmp, 65535
@@ -71,7 +71,7 @@ bb:
 ; O2-NOT: and_b32
 ; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
 ; O2-NOT: and_b32
-define void @func_test_workitem_id_x_known_max_range(ptr addrspace(1) nocapture %out) #0 {
+define void @func_test_workitem_id_x_known_max_range(ptr addrspace(1) nocapture %out) nounwind "amdgpu-flat-work-group-size"="64,128" {
 entry:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %and = and i32 %id, 1023
@@ -83,7 +83,7 @@ entry:
 ; O2-NOT: and_b32
 ; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
 ; O2-NOT: and_b32
-define void @func_test_workitem_id_x_default_range(ptr addrspace(1) nocapture %out) #4 {
+define void @func_test_workitem_id_x_default_range(ptr addrspace(1) nocapture %out) nounwind {
 entry:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %and = and i32 %id, 1023
@@ -91,17 +91,11 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
 
-declare i32 @llvm.amdgcn.workitem.id.y() #2
+declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone speculatable
 
-declare i32 @llvm.amdgcn.workitem.id.z() #2
-
-attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" }
-attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
-attributes #2 = { nounwind readnone speculatable }
-attributes #3 = { nounwind readnone }
-attributes #4 = { nounwind }
+declare i32 @llvm.amdgcn.workitem.id.z() nounwind readnone speculatable
 
 !0 = !{i32 32, i32 4, i32 1}
 !1 = !{i32 1, i32 1, i32 1}